diff --git a/srfi-264-test.scm b/srfi-264-test.scm index f4d5791..cc72288 100644 --- a/srfi-264-test.scm +++ b/srfi-264-test.scm @@ -1,1469 +1,2803 @@ ;;; SPDX-FileCopyrightText: 2025 Sergei Egorov ;;; SPDX-License-Identifier: MIT -(import (scheme base) (srfi 64) (srfi 264)) - ; SRFI 264 Tests (borrowed from many sources) -(define (ssre->sre/opts s . o*) - (if (pair? o*) - (let* ((os (apply string-append (map symbol->string o*))) - (s (string-append "(?" os ")" s))) - (string-sre->sre s)) - (string-sre->sre s))) +(import (scheme base) (srfi 64) (srfi 264)) (define-syntax test-ssre (syntax-rules () - ((test-ssre pat o* res) - (test-equal 'res (apply ssre->sre/opts 'pat 'o*))))) + ((test-ssre pat res) + (test-equal 'res (ssre->sre 'pat))))) + +(define-syntax test-sre + (syntax-rules () + ((test-sre sre ssre) + (test-equal 'ssre (sre->ssre 'sre))))) + +; save default definitions +(define *ssre-definitions* (ssre-definitions)) + +; add some random definitions for the ssre tests +(ssre-definitions + (ssre-bind 'Any 'cset 'any + (ssre-bind 'Nd 'cset 'numeric + (ssre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w) + (ssre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W) + (ssre-bind 'L 'cset 'alpha + (ssre-bind 'Ll 'cset 'lower + (ssre-bind 'Lu 'cset 'upper + *ssre-definitions*)))))))) + +(test-begin "srfi-264") + +; NOTE: translations on the right are not the only correct ones; there can be equivalent translations, which are also correct -; add some random definitions to pass the tests -(string-sre-definitions - (string-sre-bind 'Any 'cset 'any - (string-sre-bind 'Nd 'cset 'numeric - (string-sre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w) - (string-sre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W) - (string-sre-bind 'L 'cset 'alpha - (string-sre-bind 'Ll 'cset 'lower - (string-sre-bind 'Lu 'cset 'upper - (string-sre-definitions))))))))) +(test-ssre "the quick brown fox" (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x)) +(test-ssre "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz" (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) +(test-ssre "^(abc){1,2}zz" (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}c" (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "^(ba|b*){1,2}?bc" (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) +(test-ssre "^[ab\\]cde]" (: bos (or #\a #\b #\] #\c #\d #\e))) +(test-ssre "^[]cde]" (: bos (or #\] #\c #\d #\e))) +(test-ssre "^[^ab\\]cde]" (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) +(test-ssre "^[^]cde]" (: bos (~ (or #\] #\c #\d #\e)))) +(test-ssre "^@" (: bos #\@)) +(test-ssre "^[0-9]+$" (: bos (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.*nter" (: bos (* nonl) #\n #\t #\e #\r)) +(test-ssre "^xxx[0-9]+$" (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.+[0-9][0-9][0-9]$" (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^.+?[0-9][0-9][0-9]$" (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) +(test-ssre ":" #\:) +(test-ssre "([\\da-f:]+)$" (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos)) +(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) +(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) +(test-ssre "^(?=ab(de))(abd)(e)" (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) +(test-ssre "^(?!(ab)de|x)(abd)(f)" (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) +(test-ssre "^(?=(ab(cd)))(ab)" (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) +(test-ssre "^[\\da-f](\\.[\\da-f])*$" (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos)) +(test-ssre "^\".*\"\\s*(;.*)?$" (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos)) +(test-ssre "^$" (: bos eos)) +(test-ssre "(?x)^ a\\ b[c ]d $" (: bos #\a #\space #\b (or #\c #\space) #\d eos)) +(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) +(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) +(test-ssre "^[\\w][\\W][\\s][\\S][\\d][\\D]\\]" (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\])) +(test-ssre "^[.^$|()*+?{,}]+" (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) +(test-ssre "^a*\\w" (: bos (* #\a) (or alnum #\_))) +(test-ssre "^a*?\\w" (: bos (*? #\a) (or alnum #\_))) +(test-ssre "^a+\\w" (: bos (+ #\a) (or alnum #\_))) +(test-ssre "^a+?\\w" (: bos (**? 1 #f #\a) (or alnum #\_))) +(test-ssre "^\\d{8}\\w{2,}" (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) +(test-ssre "^[aeiou\\d]{4,5}$" (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) +(test-ssre "^[aeiou\\d]{4,5}?" (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) +(test-ssre "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]" (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric)) +(test-ssre "^12.34" (: bos #\1 #\2 nonl #\3 #\4)) +(test-ssre "foo(?!bar)(.*)" (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) +(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "(?!^)abc" (: (neg-look-ahead bos) #\a #\b #\c)) +(test-ssre "(?=^)abc" (: (look-ahead bos) #\a #\b #\c)) +(test-ssre "^[ab]{1,3}(ab*|b)" (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) +(test-ssre "^[ab]{1,3}?(ab*|b)" (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) +(test-ssre "^[ab]{1,3}?(ab*?|b)" (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) +(test-ssre "^[ab]{1,3}(ab*?|b)" (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) +(test-ssre "^[W-c]+$" (: bos (+ (char-range #\W #\c)) eos)) +(test-ssre "^[?-_]+$" (: bos (+ (char-range #\? #\_)) eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "\\Aabc\\z" (: bos #\a #\b #\c eos)) +(test-ssre "\\A(.)*\\z" (: bos (* ($ nonl)) eos)) +(test-ssre "(?:b)|(?::+)" (or #\b (+ #\:))) +(test-ssre "[-az]+" (+ (or #\- #\a #\z))) +(test-ssre "[az-]+" (+ (or #\a #\z #\-))) +(test-ssre "[a\\-z]+" (+ (or #\a #\- #\z))) +(test-ssre "[a-z]+" (+ (char-range #\a #\z))) +(test-ssre "[\\d-]+" (+ (or numeric #\-))) +(test-ssre "\\\\" #\\) +(test-ssre "a{0}bc" (: (= 0 #\a) #\b #\c)) +(test-ssre "(a|(bc)){0,0}?xyz" (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) +(test-ssre "^([^a])([^b])([^c]*)([^d]{3,4})" (: bos ($ (~ #\a)) ($ (~ #\b)) ($ (* (~ #\c))) ($ (** 3 4 (~ #\d))))) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^k]$" (: (~ #\k) eos)) +(test-ssre "[^k]{2,3}$" (: (** 2 3 (~ #\k)) eos)) +(test-ssre "^\\d{8,}@.+[^k]$" (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^az]" (~ (or #\a #\z))) +(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) +(test-ssre "P[^*]TAIRE[^*]{1,}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) +(test-ssre "(\\.\\d\\d[1-9]?)\\d+" (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) +(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) +(test-ssre "\\b(foo)\\s+(\\w+)" (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_))))) +(test-ssre "foo(.*)bar" (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) +(test-ssre "foo(.*?)bar" (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) +(test-ssre "(.*)(\\d*)" (: ($ (* nonl)) ($ (* numeric)))) +(test-ssre "(.*)(\\d+)" (: ($ (* nonl)) ($ (+ numeric)))) +(test-ssre "(.*?)(\\d*)" (: ($ (*? nonl)) ($ (* numeric)))) +(test-ssre "(.*?)(\\d+)" (: ($ (*? nonl)) ($ (+ numeric)))) +(test-ssre "(.*)(\\d+)$" (: ($ (* nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*?)(\\d+)$" (: ($ (*? nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*)\\b(\\d+)$" (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) +(test-ssre "(.*\\D)(\\d+)$" (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) +(test-ssre "^\\D*(?!123)" (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^[W-]46\\]" (: bos (or #\W #\-) #\4 #\6 #\])) +(test-ssre "^[W-\\]46]" (: bos (or (char-range #\W #\]) #\4 #\6))) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "^(a){0,0}" (: bos (= 0 ($ #\a)))) +(test-ssre "^(a){0,1}" (: bos (** 0 1 ($ #\a)))) +(test-ssre "^(a){0,2}" (: bos (** 0 2 ($ #\a)))) +(test-ssre "^(a){0,3}" (: bos (** 0 3 ($ #\a)))) +(test-ssre "^(a){0,}" (: bos (>= 0 ($ #\a)))) +(test-ssre "^(a){1,1}" (: bos (= 1 ($ #\a)))) +(test-ssre "^(a){1,2}" (: bos (** 1 2 ($ #\a)))) +(test-ssre "^(a){1,3}" (: bos (** 1 3 ($ #\a)))) +(test-ssre "^(a){1,}" (: bos (>= 1 ($ #\a)))) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".{0,}\\.gif" (: (>= 0 nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "^.*B" (: bos (* nonl) #\B)) +(test-ssre "(?m)^.*B" (: bol (* nonl) #\B)) +(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[abc]{12}" (: bos (= 12 (or #\a #\b #\c)))) +(test-ssre "^[a-c]{12}" (: bos (= 12 (char-range #\a #\c)))) +(test-ssre "^(a|b|c){12}" (: bos (= 12 ($ (or #\a #\b #\c))))) +(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) +(test-ssre "abcde{0,0}" (: #\a #\b #\c #\d (= 0 #\e))) +(test-ssre "ab[cd]{0,0}e" (: #\a #\b (= 0 (or #\c #\d)) #\e)) +(test-ssre "ab(c){0,0}d" (: #\a #\b (= 0 ($ #\c)) #\d)) +(test-ssre "a(b*)" (: #\a ($ (* #\b)))) +(test-ssre "ab\\d{0}e" (: #\a #\b (= 0 numeric) #\e)) +(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) +(test-ssre ".*?" (*? nonl)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?c" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "(?!\\A)x" (: (neg-look-ahead bos) #\x)) +(test-ssre "(A|B)*?CD" (: (*? ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(A|B)*CD" (: (* ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(?= 0 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "ab{3,4}bc" (: #\a (** 3 4 #\b) #\b #\c)) +(test-ssre "ab{4,5}bc" (: #\a (** 4 5 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab{0,1}bc" (: #\a (** 0 1 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab?c" (: #\a (? #\b) #\c)) +(test-ssre "ab{0,1}c" (: #\a (** 0 1 #\b) #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "^" bos) +(test-ssre "$" eos) +(test-ssre "a.c" (: #\a nonl #\c)) +(test-ssre "a.*c" (: #\a (* nonl) #\c)) +(test-ssre "a[bc]d" (: #\a (or #\b #\c) #\d)) +(test-ssre "a[b-d]e" (: #\a (char-range #\b #\d) #\e)) +(test-ssre "a[b-d]" (: #\a (char-range #\b #\d))) +(test-ssre "a[-b]" (: #\a (or #\- #\b))) +(test-ssre "a[b-]" (: #\a (or #\b #\-))) +(test-ssre "a\\]" (: #\a #\])) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[^bc]d" (: #\a (~ (or #\b #\c)) #\d)) +(test-ssre "a[^-b]c" (: #\a (~ (or #\- #\b)) #\c)) +(test-ssre "a[^]b]c" (: #\a (~ (or #\] #\b)) #\c)) +(test-ssre "\\ba\\b" (: (or bow eow) #\a (or bow eow))) +(test-ssre "\\by\\b" (: (or bow eow) #\y (or bow eow))) +(test-ssre "\\Ba\\B" (: nwb #\a nwb)) +(test-ssre "\\By\\b" (: nwb #\y (or bow eow))) +(test-ssre "\\by\\B" (: (or bow eow) #\y nwb)) +(test-ssre "\\By\\B" (: nwb #\y nwb)) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "a\\sb" (: #\a space #\b)) +(test-ssre "a\\Sb" (: #\a (~ space) #\b)) +(test-ssre "\\d" numeric) +(test-ssre "\\D" (~ numeric)) +(test-ssre "ab|cd" (or (: #\a #\b) (: #\c #\d))) +(test-ssre "()ef" (: ($ (:)) #\e #\f)) +(test-ssre "$b" (: eos #\b)) +(test-ssre "a\\(b" (: #\a #\( #\b)) +(test-ssre "a\\(*b" (: #\a (* #\() #\b)) +(test-ssre "a\\\\b" (: #\a #\\ #\b)) +(test-ssre "((a))" ($ ($ #\a))) +(test-ssre "(a)b(c)" (: ($ #\a) #\b ($ #\c))) +(test-ssre "a+b+c" (: (+ #\a) (+ #\b) #\c)) +(test-ssre "a{1,}b{1,}c" (: (>= 1 #\a) (>= 1 #\b) #\c)) +(test-ssre "a.+?c" (: #\a (**? 1 #f nonl) #\c)) +(test-ssre "(a+|b)*" (* ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,}" (>= 0 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)+" (+ ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){1,}" (>= 1 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)?" (? ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,1}" (** 0 1 ($ (or (+ #\a) #\b)))) +(test-ssre "[^ab]*" (* (~ (or #\a #\b)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a*" (* #\a)) +(test-ssre "([abc])*d" (: (* ($ (or #\a #\b #\c))) #\d)) +(test-ssre "([abc])*bcd" (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) +(test-ssre "a|b|c|d|e" (or #\a #\b #\c #\d #\e)) +(test-ssre "(a|b|c|d|e)f" (: ($ (or #\a #\b #\c #\d #\e)) #\f)) +(test-ssre "abcd*efg" (: #\a #\b #\c (* #\d) #\e #\f #\g)) +(test-ssre "ab*" (: #\a (* #\b))) +(test-ssre "(ab|cd)e" (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "[abhgefdc]ij" (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) +(test-ssre "^(ab|cd)e" (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "(abc|)ef" (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) +(test-ssre "(a|b)c*d" (: ($ (or #\a #\b)) (* #\c) #\d)) +(test-ssre "(ab|ab*)bc" (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) +(test-ssre "a([bc]*)c*" (: #\a ($ (* (or #\b #\c))) (* #\c))) +(test-ssre "a([bc]*)(c*d)" (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]+)(c*d)" (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]*)(c+d)" (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) +(test-ssre "a[bcd]*dcdcde" (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "a[bcd]+dcdcde" (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "(ab|a)b*c" (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) +(test-ssre "((a)(b)c)(d)" (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) +(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) +(test-ssre "^a(bc+|b[eh])g|.h$" (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) +(test-ssre "(bc+d$|ef*g.|h?i(j|k))" ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) +(test-ssre "((((((((((a))))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) +(test-ssre "(((((((((a)))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) +(test-ssre "multiple words of text" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) +(test-ssre "multiple words" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) +(test-ssre "(.*)c(.*)" (: ($ (* nonl)) #\c ($ (* nonl)))) +(test-ssre "\\((.*), (.*)\\)" (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) +(test-ssre "[k]" #\k) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(bc)d" (: #\a ($ (: #\b #\c)) #\d)) +(test-ssre "a[-]?c" (: #\a (? #\-) #\c)) +(test-ssre "a(?!b)." (: #\a (neg-look-ahead #\b) nonl)) +(test-ssre "a(?=d)." (: #\a (look-ahead #\d) nonl)) +(test-ssre "a(?=c|d)." (: #\a (look-ahead (or #\c #\d)) nonl)) +(test-ssre "a(?:b|c|d)(.)" (: #\a (or #\b #\c #\d) ($ nonl))) +(test-ssre "a(?:b|c|d)*(.)" (: #\a (* (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+?(.)" (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+(.)" (: #\a (+ (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){2}(.)" (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}(.)" (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}?(.)" (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}(.)" (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}?(.)" (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}(.)" (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}?(.)" (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}(.)" (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}?(.)" (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) +(test-ssre "^(.+)?B" (: bos (? ($ (+ nonl))) #\B)) +(test-ssre "^([^a-z])|(\\^)$" (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) +(test-ssre "^[<>]&" (: bos (or #\< #\>) #\&)) +(test-ssre "(?<=a)b" (: (look-behind #\a) #\b)) +(test-ssre "(?a+)ab" (: ($ (: #\> (+ #\a))) #\a #\b)) +(test-ssre "b\\z" (: #\b eos)) +(test-ssre "(?<=\\d{3}(?!999))foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) +(test-ssre "(?<=(?!...999)\\d{3})foo" (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}(?!999)...)foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_))) +(test-ssre "a*\\d*\\w" (: (* #\a) (* numeric) (or alnum #\_))) +(test-ssre "(?x)a*b *\\w" (: (* #\a) (* #\b) (or alnum #\_))) +(test-ssre "(?x)a* b *\\w" (: (* #\a) (* #\b) (or alnum #\_))) +(test-ssre "\\z(?= 2 ($ (** 2 3 #\a)))) #\a)) +(test-ssre "(?=C)" (look-ahead #\C)) +(test-ssre "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)" (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9)))))) +(test-ssre "^(a){2,}+(\\w)" (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_)))) +(test-ssre "^(?:a){2,}+(\\w)" (: bos (+ (>= 2 #\a)) ($ (or alnum #\_)))) +(test-ssre "\\A.*?(a|bc)" (: bos (*? nonl) ($ (or #\a (: #\b #\c))))) +(test-ssre "\\A.*?(?:a|bc|d)" (: bos (*? nonl) (or #\a (: #\b #\c) #\d))) +(test-ssre "(?:.*?a)(?<=ba)" (: (*? nonl) #\a (look-behind (: #\b #\a)))) +(test-ssre "a(?=bc).|abd" (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d))) +(test-ssre "\\A.*?(?:a|bc)" (: bos (*? nonl) (or #\a (: #\b #\c)))) +(test-ssre "^\\d*\\w{4}" (: bos (* numeric) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^a*\\w{4}" (: bos (* #\a) (= 4 (or alnum #\_)))) +(test-ssre "(?:(?foo)|(?bar))\\k" (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "(?A)(?:(?foo)|(?bar))\\k" (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc" (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c)) +(test-ssre "\\sabc" (: space #\a #\b #\c)) +(test-ssre "Z*(|d*){216}" (: (* #\Z) (= 216 ($ (or (:) (* #\d)))))) +(test-ssre "(?<=a(B){0}c)X" (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X)) +(test-ssre "a+(?:|b)a" (: (+ #\a) (or (:) #\b) #\a)) +(test-ssre "X?(R||){3335}" (: (? #\X) (= 3335 ($ (or #\R (:) (:)))))) +(test-ssre "(?!(b))c|b" (or (: (neg-look-ahead ($ #\b)) #\c) #\b)) +(test-ssre "(?=(b))b|c" (or (: (look-ahead ($ #\b)) #\b) #\c)) +(test-ssre "<(?x:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?xxx:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?-x:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "[[:digit:]-]+" (+ (or numeric #\-))) +(test-ssre "(?<=(?=.)?)" (look-behind (? (look-ahead nonl)))) +(test-ssre "(?<=(?=.){4,5})" (look-behind (** 4 5 (look-ahead nonl)))) +(test-ssre "(?<=(?=.){4,5}x)" (look-behind (: (** 4 5 (look-ahead nonl)) #\x))) +(test-ssre " (? \\w+ )* \\. " (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space)) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "(?<=(?=(?<=a)))b" (: (look-behind (look-ahead (look-behind #\a))) #\b)) +(test-ssre "(?<=ab?c)..." (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl)) +(test-ssre "(?<=PQR|ab?c)..." (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl)) +(test-ssre "(?<=ab?c|PQR)..." (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl)) +(test-ssre "(?<=PQ|ab?c)..." (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl)) +(test-ssre "(?<=ab?c|PQ)..." (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl)) +(test-ssre "(?<=a(b?c|d?e?e)f)X." (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl)) +(test-ssre "(?= 5 (char-range #\a #\z)) #\b) #\x)) +(test-ssre "[a-z]{1,6}?s|x" (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x)) +(test-ssre "[@]" #\@) +(test-ssre "@" #\@) +(test-ssre "@@@xxx" (: #\@ #\@ #\@ #\x #\x #\x)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "shortutf" (: #\s #\h #\o #\r #\t #\u #\t #\f)) +(test-ssre "anything" (: #\a #\n #\y #\t #\h #\i #\n #\g)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=x)badutf" (: (look-behind #\x) #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=xx)badutf" (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=xxxx)badutf" (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f)) +(test-ssre "X" #\X) +(test-ssre "a+" (+ #\a)) +(test-ssre "A" #\A) +(test-ssre "x" #\x) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "X" #\X) +(test-ssre "(?<=.)X" (: (look-behind nonl) #\X)) +(test-ssre "a+" (+ #\a)) +(test-ssre "a" #\a) +(test-ssre "." nonl) +(test-ssre "s" #\s) +(test-ssre "[^s]" (~ #\s)) +(test-ssre "a(?:.)*?a" (: #\a (*? nonl) #\a)) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "a\\b" (: #\a (or bow eow))) +(test-ssre "abc(?=abcde)(?=ab)" (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) +(test-ssre "(?<=abc)123" (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) +(test-ssre "\\babc\\b" (: (or bow eow) #\a #\b #\c (or bow eow))) +(test-ssre "(?<=abc)def" (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) +(test-ssre "abc(?<=bc)def" (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f)) +(test-ssre "(?<=ab)cdef" (: (look-behind (: #\a #\b)) #\c #\d #\e #\f)) +(test-ssre "b(?tom|bon)-\\k" (: (-> A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A))) +(test-ssre "Xa{2,4}b" (: #\X (** 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}?b" (: #\X (**? 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}+b" (: #\X (+ (** 2 4 #\a)) #\b)) +(test-ssre "X\\d{2,4}b" (: #\X (** 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}?b" (: #\X (**? 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}+b" (: #\X (+ (** 2 4 numeric)) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X[abc]{2,4}b" (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}?b" (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}+b" (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "Z(?!)" (: #\Z (neg-look-ahead (:)))) +(test-ssre "dog(sbody)?" (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y))))) +(test-ssre "dog(sbody)??" (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y))))) +(test-ssre "dog|dogsbody" (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y))) +(test-ssre "dogsbody|dog" (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g))) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "(?<=abc)123" (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) +(test-ssre "\\babc\\b" (: (or bow eow) #\a #\b #\c (or bow eow))) +(test-ssre "a?b?" (: (? #\a) (? #\b))) +(test-ssre "^a?b?" (: bos (? #\a) (? #\b))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "(?<=abc)def" (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "abc\\z" (: #\a #\b #\c eos)) +(test-ssre "abc\\b" (: #\a #\b #\c (or bow eow))) +(test-ssre "abc\\B" (: #\a #\b #\c nwb)) +(test-ssre ".+" (+ nonl)) +(test-ssre "(?<=(abc)+)X" (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X)) +(test-ssre "(a)b|ac" (or (: ($ #\a) #\b) (: #\a #\c))) +(test-ssre "(a)(b)x|abc" (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c))) +(test-ssre "(?:(foo)|(bar)|(baz))X" (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X)) +(test-ssre "(ab)x|ab" (or (: ($ (: #\a #\b)) #\x) (: #\a #\b))) +(test-ssre "(((((a)))))" ($ ($ ($ ($ ($ #\a)))))) +(test-ssre "a*?b*?" (: (*? #\a) (*? #\b))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a(b)c" (: #\a ($ #\b) #\c)) +(test-ssre "(a)(b)|(c)" (or (: ($ #\a) ($ #\b)) ($ #\c))) +(test-ssre "(?a)|(?b)" (or (-> A #\a) (-> A #\b))) +(test-ssre "a(b)c(d)" (: #\a ($ #\b) #\c ($ #\d))) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre ".*\\d" (: (* nonl) numeric)) +(test-ssre "(abc)*" (* ($ (: #\a #\b #\c)))) +(test-ssre "^" bos) +(test-ssre "(?:ab)?(?:ab)(?:ab)" (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "(abcd)" ($ (: #\a #\b #\c #\d))) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(b)c" (: #\a ($ #\b) #\c)) +(test-ssre "0b 28 3f 2d 78 29 3a" (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a)) +(test-ssre "a|(b)c" (or #\a (: ($ #\b) #\c))) +(test-ssre "efg" (: #\e #\f #\g)) +(test-ssre "eff" (: #\e #\f #\f)) +(test-ssre "effg" (: #\e #\f #\f #\g)) +(test-ssre "aaa" (: #\a #\a #\a)) +(test-ssre "(?)" (: #\[ ($ (:)) (= 65535 #\]) (-> A (:)))) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "\\z" eos) +(test-ssre "\\Z" (: (? #\newline) eos)) +(test-ssre "(?![ab]).*" (: (neg-look-ahead (or #\a #\b)) (* nonl))) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "12345(?<=\\d{1,256})X" (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X)) +(test-ssre "(?foo)|(?bar))\\k" (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "a?b[]xy]*c" (: (? #\a) #\b (* (or #\] #\x #\y)) #\c)) +(test-ssre "f*" (* #\f)) +(test-ssre "foo\\*" (: #\f #\o #\o #\*)) +(test-ssre "foo\\*bar" (: #\f #\o #\o #\* #\b #\a #\r)) +(test-ssre "f\\\\oo" (: #\f #\\ #\o #\o)) +(test-ssre "[ten]" (or #\t #\e #\n)) +(test-ssre "t[a-g]n" (: #\t (char-range #\a #\g) #\n)) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[]a-]b" (: #\a (or #\] #\a #\-) #\b)) +(test-ssre "a[]-]b" (: #\a (or #\] #\-) #\b)) +(test-ssre "a[]a-z]b" (: #\a (or #\] (char-range #\a #\z)) #\b)) +(test-ssre "\\]" #\]) +(test-ssre "t[!a-g]n" (: #\t (or #\! (char-range #\a #\g)) #\n)) +(test-ssre "A[+-0]B" (: #\A (char-range #\+ #\0) #\B)) +(test-ssre "a[--0]z" (: #\a (char-range #\- #\0) #\z)) +(test-ssre "a[[:digit:].]z" (: #\a (or numeric #\.) #\z)) +(test-ssre "A\\B\\\\C\\D" (: #\A nwb #\\ #\C (~ numeric))) +(test-ssre "a*b" (: (* #\a) #\b)) +(test-ssre "<[]bc]>" (: #\< (or #\] #\b #\c) #\>)) +(test-ssre "<[^]bc]>" (: #\< (~ (or #\] #\b #\c)) #\>)) +(test-ssre "a*b+c\\+[def](ab)\\(cd\\)" (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\))) +(test-ssre "how.to how\\.to" (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o)) +(test-ssre "^how to \\^how to" (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o)) +(test-ssre "^b\\(c^d\\)\\(^e^f\\)" (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\))) +(test-ssre "\\[()\\]{65535}()" (: #\[ ($ (:)) (= 65535 #\]) ($ (:)))) +(test-ssre "^A" (: bos #\A)) +(test-ssre "^\\w+" (: bos (+ (or alnum #\_)))) +(test-ssre "(.+)\\b(.+)" (: ($ (+ nonl)) (or bow eow) ($ (+ nonl)))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.{3,})b" (: #\a ($ (>= 3 nonl)) #\b)) +(test-ssre "a(.{3,}?)b" (: #\a ($ (**? 3 #f nonl)) #\b)) +(test-ssre "a(.{3,5})b" (: #\a ($ (** 3 5 nonl)) #\b)) +(test-ssre "a(.{3,5}?)b" (: #\a ($ (**? 3 5 nonl)) #\b)) +(test-ssre "(?<=aXb)cd" (: (look-behind (: #\a #\X #\b)) #\c #\d)) +(test-ssre "(?<=(.))X" (: (look-behind ($ nonl)) #\X)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "\\D*" (* (~ numeric))) +(test-ssre "\\D*" (* (~ numeric))) +(test-ssre "\\D" (~ numeric)) +(test-ssre ">\\S" (: #\> (~ space))) +(test-ssre "\\d" numeric) +(test-ssre "\\s" space) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "\\D{2,3}" (** 2 3 (~ numeric))) +(test-ssre "\\D{2,3}?" (**? 2 3 (~ numeric))) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d{2,3}" (** 2 3 numeric)) +(test-ssre "\\d{2,3}?" (**? 2 3 numeric)) +(test-ssre "\\S+" (+ (~ space))) +(test-ssre "\\S{2,3}" (** 2 3 (~ space))) +(test-ssre "\\S{2,3}?" (**? 2 3 (~ space))) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s{2,3}<" (: #\> (** 2 3 space) #\<)) +(test-ssre ">\\s{2,3}?<" (: #\> (**? 2 3 space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w{2,3}" (** 2 3 (or alnum #\_))) +(test-ssre "\\w{2,3}?" (**? 2 3 (or alnum #\_))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}" (** 2 3 (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}?" (**? 2 3 (~ (or alnum #\_)))) +(test-ssre "^[ac]*b" (: bos (* (or #\a #\c)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^\\d*b" (: bos (* numeric) #\b)) +(test-ssre "(|a)" ($ (or (:) #\a))) +(test-ssre "\\S\\S" (: (~ space) (~ space))) +(test-ssre "\\S{2}" (= 2 (~ space))) +(test-ssre "\\W\\W" (: (~ (or alnum #\_)) (~ (or alnum #\_)))) +(test-ssre "\\W{2}" (= 2 (~ (or alnum #\_)))) +(test-ssre "\\S" (~ space)) +(test-ssre "\\D" (~ numeric)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre ".[^\\S\n]." (: nonl (~ (or (~ space) #\newline)) nonl)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "A*" (* #\A)) +(test-ssre "." nonl) +(test-ssre "^\\d*\\w{4}" (: bos (* numeric) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^.\\B.\\B." (: bos nonl nwb nonl nwb nonl)) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "^\\w+" (: bos (+ (or alnum #\_)))) +(test-ssre "^\\d+" (: bos (+ numeric))) +(test-ssre "^>\\s+" (: bos #\> (+ space))) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "[RST]+" (+ (or #\R #\S #\T))) +(test-ssre "[R-T]+" (+ (char-range #\R #\T))) +(test-ssre "[q-u]+" (+ (char-range #\q #\u))) +(test-ssre "^s?c" (: bos (? #\s) #\c)) +(test-ssre "[A-`]" (char-range #\A #\`)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\b.+?\\b" (: (or bow eow) (**? 1 #f nonl) (or bow eow))) +(test-ssre "caf\\B.+?\\B" (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) +(test-ssre "c3 b1" (: #\c #\3 #\space #\b #\1)) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "Xa{2,4}b" (: #\X (** 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}?b" (: #\X (**? 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}+b" (: #\X (+ (** 2 4 #\a)) #\b)) +(test-ssre "X\\d{2,4}b" (: #\X (** 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}?b" (: #\X (**? 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}+b" (: #\X (+ (** 2 4 numeric)) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X[abc]{2,4}b" (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}?b" (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}+b" (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "X\\W{3}X" (: #\X (= 3 (~ (or alnum #\_))) #\X)) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "(?ss)|(?kk)) \\k" (: (or (-> A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A))) +(test-ssre "(?:(?s)|(?k)) \\k{3,}!" (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!)) +(test-ssre "i" #\i) +(test-ssre "I" #\I) +(test-ssre "[i]" #\i) +(test-ssre "[^i]" (~ #\i)) +(test-ssre "[zi]" (or #\z #\i)) +(test-ssre "[iI]" (or #\i #\I)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\bABC\\b" (: (or bow eow) #\A #\B #\C (or bow eow))) +(test-ssre "\\bABC\\b" (: (or bow eow) #\A #\B #\C (or bow eow))) +(test-ssre "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) +(test-ssre "^(abc){1,2}zz" (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}c" (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?bc" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c)) +(test-ssre "^(b*|ba){1,2}?bc" (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c)) +(test-ssre "^(ba|b*){1,2}?bc" (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) +(test-ssre "^[ab\\]cde]" (: bos (or #\a #\b #\] #\c #\d #\e))) +(test-ssre "^[]cde]" (: bos (or #\] #\c #\d #\e))) +(test-ssre "^[^ab\\]cde]" (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) +(test-ssre "^[^]cde]" (: bos (~ (or #\] #\c #\d #\e)))) +(test-ssre "^@" (: bos #\@)) +(test-ssre "^[0-9]+$" (: bos (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.*nter" (: bos (* nonl) #\n #\t #\e #\r)) +(test-ssre "^xxx[0-9]+$" (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.+[0-9][0-9][0-9]$" (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^.+?[0-9][0-9][0-9]$" (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) +(test-ssre ":" #\:) +(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) +(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) +(test-ssre "^(?=ab(de))(abd)(e)" (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) +(test-ssre "^(?!(ab)de|x)(abd)(f)" (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) +(test-ssre "^(?=(ab(cd)))(ab)" (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) +(test-ssre "^$" (: bos eos)) +(test-ssre "(?x)^ a\\ b[c ]d $" (: bos #\a #\space #\b (or #\c #\space) #\d eos)) +(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) +(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) +(test-ssre "^[.^$|()*+?{,}]+" (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) +(test-ssre "^a*\\w" (: bos (* #\a) (or alnum #\_))) +(test-ssre "^a*?\\w" (: bos (*? #\a) (or alnum #\_))) +(test-ssre "^a+\\w" (: bos (+ #\a) (or alnum #\_))) +(test-ssre "^a+?\\w" (: bos (**? 1 #f #\a) (or alnum #\_))) +(test-ssre "^\\d{8}\\w{2,}" (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) +(test-ssre "^[aeiou\\d]{4,5}$" (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) +(test-ssre "^[aeiou\\d]{4,5}?" (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) +(test-ssre "^12.34" (: bos #\1 #\2 nonl #\3 #\4)) +(test-ssre "foo(?!bar)(.*)" (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) +(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "(?!^)abc" (: (neg-look-ahead bos) #\a #\b #\c)) +(test-ssre "(?=^)abc" (: (look-ahead bos) #\a #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) +(test-ssre "^[W-c]+$" (: bos (+ (char-range #\W #\c)) eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "\\Aabc\\Z" (: bos #\a #\b #\c (: (? #\newline) eos))) +(test-ssre "\\A(.)*\\Z" (: bos (* ($ nonl)) (: (? #\newline) eos))) +(test-ssre "(?:b)|(?::+)" (or #\b (+ #\:))) +(test-ssre "[-az]+" (+ (or #\- #\a #\z))) +(test-ssre "[az-]+" (+ (or #\a #\z #\-))) +(test-ssre "[a\\-z]+" (+ (or #\a #\- #\z))) +(test-ssre "[a-z]+" (+ (char-range #\a #\z))) +(test-ssre "[\\d-]+" (+ (or numeric #\-))) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "a{0}bc" (: (= 0 #\a) #\b #\c)) +(test-ssre "(a|(bc)){0,0}?xyz" (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^k]$" (: (~ #\k) eos)) +(test-ssre "[^k]{2,3}$" (: (** 2 3 (~ #\k)) eos)) +(test-ssre "^\\d{8,}@.+[^k]$" (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^az]" (~ (or #\a #\z))) +(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) +(test-ssre "P[^*]TAIRE[^*]{1,}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) +(test-ssre "(\\.\\d\\d[1-9]?)\\d+" (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) +(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) +(test-ssre "foo(.*)bar" (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) +(test-ssre "foo(.*?)bar" (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) +(test-ssre "(.*)(\\d+)" (: ($ (* nonl)) ($ (+ numeric)))) +(test-ssre "(.*?)(\\d+)" (: ($ (*? nonl)) ($ (+ numeric)))) +(test-ssre "(.*)(\\d+)$" (: ($ (* nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*?)(\\d+)$" (: ($ (*? nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*)\\b(\\d+)$" (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) +(test-ssre "(.*\\D)(\\d+)$" (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) +(test-ssre "^\\D*(?!123)" (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^[W-\\]46]" (: bos (or (char-range #\W #\]) #\4 #\6))) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "^(a){0,0}" (: bos (= 0 ($ #\a)))) +(test-ssre "^(a){0,1}" (: bos (** 0 1 ($ #\a)))) +(test-ssre "^(a){0,2}" (: bos (** 0 2 ($ #\a)))) +(test-ssre "^(a){0,3}" (: bos (** 0 3 ($ #\a)))) +(test-ssre "^(a){0,}" (: bos (>= 0 ($ #\a)))) +(test-ssre "^(a){1,1}" (: bos (= 1 ($ #\a)))) +(test-ssre "^(a){1,2}" (: bos (** 1 2 ($ #\a)))) +(test-ssre "^(a){1,3}" (: bos (** 1 3 ($ #\a)))) +(test-ssre "^(a){1,}" (: bos (>= 1 ($ #\a)))) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".{0,}\\.gif" (: (>= 0 nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "^.*B" (: bos (* nonl) #\B)) +(test-ssre "(?m)^.*B" (: bol (* nonl) #\B)) +(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[abc]{12}" (: bos (= 12 (or #\a #\b #\c)))) +(test-ssre "^[a-c]{12}" (: bos (= 12 (char-range #\a #\c)))) +(test-ssre "^(a|b|c){12}" (: bos (= 12 ($ (or #\a #\b #\c))))) +(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) +(test-ssre "abcde{0,0}" (: #\a #\b #\c #\d (= 0 #\e))) +(test-ssre "ab[cd]{0,0}e" (: #\a #\b (= 0 (or #\c #\d)) #\e)) +(test-ssre "ab(c){0,0}d" (: #\a #\b (= 0 ($ #\c)) #\d)) +(test-ssre "a(b*)" (: #\a ($ (* #\b)))) +(test-ssre "ab\\d{0}e" (: #\a #\b (= 0 numeric) #\e)) +(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) +(test-ssre ".*?" (*? nonl)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?c" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "(?!\\A)x" (: (neg-look-ahead bos) #\x)) +(test-ssre "(A|B)*CD" (: (* ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(?= 0 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "ab{3,4}bc" (: #\a (** 3 4 #\b) #\b #\c)) +(test-ssre "ab{4,5}bc" (: #\a (** 4 5 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab{0,1}bc" (: #\a (** 0 1 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab?c" (: #\a (? #\b) #\c)) +(test-ssre "ab{0,1}c" (: #\a (** 0 1 #\b) #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "^" bos) +(test-ssre "$" eos) +(test-ssre "a.c" (: #\a nonl #\c)) +(test-ssre "a.*c" (: #\a (* nonl) #\c)) +(test-ssre "a[bc]d" (: #\a (or #\b #\c) #\d)) +(test-ssre "a[b-d]e" (: #\a (char-range #\b #\d) #\e)) +(test-ssre "a[b-d]" (: #\a (char-range #\b #\d))) +(test-ssre "a[-b]" (: #\a (or #\- #\b))) +(test-ssre "a[b-]" (: #\a (or #\b #\-))) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[^bc]d" (: #\a (~ (or #\b #\c)) #\d)) +(test-ssre "a[^-b]c" (: #\a (~ (or #\- #\b)) #\c)) +(test-ssre "a[^]b]c" (: #\a (~ (or #\] #\b)) #\c)) +(test-ssre "\\ba\\b" (: (or bow eow) #\a (or bow eow))) +(test-ssre "\\by\\b" (: (or bow eow) #\y (or bow eow))) +(test-ssre "\\Ba\\B" (: nwb #\a nwb)) +(test-ssre "\\By\\b" (: nwb #\y (or bow eow))) +(test-ssre "\\by\\B" (: (or bow eow) #\y nwb)) +(test-ssre "\\By\\B" (: nwb #\y nwb)) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "a\\sb" (: #\a space #\b)) +(test-ssre "a\\Sb" (: #\a (~ space) #\b)) +(test-ssre "\\d" numeric) +(test-ssre "\\D" (~ numeric)) +(test-ssre "ab|cd" (or (: #\a #\b) (: #\c #\d))) +(test-ssre "()ef" (: ($ (:)) #\e #\f)) +(test-ssre "$b" (: eos #\b)) +(test-ssre "a\\(b" (: #\a #\( #\b)) +(test-ssre "a\\(*b" (: #\a (* #\() #\b)) +(test-ssre "a\\\\b" (: #\a #\\ #\b)) +(test-ssre "((a))" ($ ($ #\a))) +(test-ssre "(a)b(c)" (: ($ #\a) #\b ($ #\c))) +(test-ssre "a+b+c" (: (+ #\a) (+ #\b) #\c)) +(test-ssre "a{1,}b{1,}c" (: (>= 1 #\a) (>= 1 #\b) #\c)) +(test-ssre "a.+?c" (: #\a (**? 1 #f nonl) #\c)) +(test-ssre "(a+|b)*" (* ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,}" (>= 0 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)+" (+ ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){1,}" (>= 1 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)?" (? ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,1}" (** 0 1 ($ (or (+ #\a) #\b)))) +(test-ssre "[^ab]*" (* (~ (or #\a #\b)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a*" (* #\a)) +(test-ssre "([abc])*d" (: (* ($ (or #\a #\b #\c))) #\d)) +(test-ssre "([abc])*bcd" (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) +(test-ssre "a|b|c|d|e" (or #\a #\b #\c #\d #\e)) +(test-ssre "(a|b|c|d|e)f" (: ($ (or #\a #\b #\c #\d #\e)) #\f)) +(test-ssre "abcd*efg" (: #\a #\b #\c (* #\d) #\e #\f #\g)) +(test-ssre "ab*" (: #\a (* #\b))) +(test-ssre "(ab|cd)e" (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "[abhgefdc]ij" (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) +(test-ssre "^(ab|cd)e" (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "(abc|)ef" (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) +(test-ssre "(a|b)c*d" (: ($ (or #\a #\b)) (* #\c) #\d)) +(test-ssre "(ab|ab*)bc" (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) +(test-ssre "a([bc]*)c*" (: #\a ($ (* (or #\b #\c))) (* #\c))) +(test-ssre "a([bc]*)(c*d)" (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]+)(c*d)" (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]*)(c+d)" (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) +(test-ssre "a[bcd]*dcdcde" (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "a[bcd]+dcdcde" (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "(ab|a)b*c" (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) +(test-ssre "((a)(b)c)(d)" (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) +(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) +(test-ssre "^a(bc+|b[eh])g|.h$" (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) +(test-ssre "(bc+d$|ef*g.|h?i(j|k))" ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) +(test-ssre "((((((((((a))))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) +(test-ssre "(((((((((a)))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) +(test-ssre "multiple words of text" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) +(test-ssre "multiple words" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) +(test-ssre "(.*)c(.*)" (: ($ (* nonl)) #\c ($ (* nonl)))) +(test-ssre "\\((.*), (.*)\\)" (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) +(test-ssre "[k]" #\k) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(bc)d" (: #\a ($ (: #\b #\c)) #\d)) +(test-ssre "a[-]?c" (: #\a (? #\-) #\c)) +(test-ssre "a(?!b)." (: #\a (neg-look-ahead #\b) nonl)) +(test-ssre "a(?=d)." (: #\a (look-ahead #\d) nonl)) +(test-ssre "a(?=c|d)." (: #\a (look-ahead (or #\c #\d)) nonl)) +(test-ssre "a(?:b|c|d)(.)" (: #\a (or #\b #\c #\d) ($ nonl))) +(test-ssre "a(?:b|c|d)*(.)" (: #\a (* (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+?(.)" (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+(.)" (: #\a (+ (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){2}(.)" (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}(.)" (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}?(.)" (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "((foo)|(bar))*" (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)))))) +(test-ssre "a(?:b|c|d){6,7}(.)" (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}?(.)" (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}(.)" (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}?(.)" (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}(.)" (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}?(.)" (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) +(test-ssre "^(.+)?B" (: bos (? ($ (+ nonl))) #\B)) +(test-ssre "^([^a-z])|(\\^)$" (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) +(test-ssre "^[<>]&" (: bos (or #\< #\>) #\&)) +(test-ssre "(?:(f)(o)(o)|(b)(a)(r))*" (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r))))) +(test-ssre "(?<=a)b" (: (look-behind #\a) #\b)) +(test-ssre "(?a+)ab" (: ($ (: #\> (+ #\a))) #\a #\b)) +(test-ssre "a\\z" (: #\a eos)) +(test-ssre "(?<=\\d{3}(?!999))foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) +(test-ssre "(?<=(?!...999)\\d{3})foo" (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}(?!999)...)foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}...)(?= 2 (or #\a #\b))) +(test-ssre "[ab]{2,}?" (**? 2 #f (or #\a #\b))) +(test-ssre "abc(?=xyz)" (: #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "a\\b" (: #\a (or bow eow))) +(test-ssre "abc(?=abcde)(?=ab)" (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) +(test-ssre "a*?b*?" (: (*? #\a) (*? #\b))) +(test-ssre "(a)(b)|(c)" (or (: ($ #\a) ($ #\b)) ($ #\c))) +(test-ssre "(?aa)" (-> A (: #\a #\a))) +(test-ssre "a(b)c(d)" (: #\a ($ #\b) #\c ($ #\d))) +(test-ssre "^" bos) +(test-ssre "(02-)?[0-9]{3}-[0-9]{3}" (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "abc|bcd" (or (: #\a #\b #\c) (: #\b #\c #\d))) +(test-ssre "(?<=abc|)" (look-behind (or (: #\a #\b #\c) (:)))) +(test-ssre "(?<=abc|)" (look-behind (or (: #\a #\b #\c) (:)))) +(test-ssre "(?<=|abc)" (look-behind (or (:) (: #\a #\b #\c)))) +(test-ssre "[abc]" (or #\a #\b #\c)) +(test-ssre "foobar" (: #\f #\o #\o #\b #\a #\r)) +(test-ssre "foobar" (: #\f #\o #\o #\b #\a #\r)) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "\\z" eos) +(test-ssre "\\Z" (: (? #\newline) eos)) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "(?![ab]).*" (: (neg-look-ahead (or #\a #\b)) (* nonl))) +(test-ssre "[a[]" (or #\a #\[)) +(test-ssre "\\bX" (: (or bow eow) #\X)) +(test-ssre "\\BX" (: nwb #\X)) +(test-ssre "X\\b" (: #\X (or bow eow))) +(test-ssre "X\\B" (: #\X nwb)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.{3,})b" (: #\a ($ (>= 3 nonl)) #\b)) +(test-ssre "a(.{3,}?)b" (: #\a ($ (**? 3 #f nonl)) #\b)) +(test-ssre "a(.{3,5})b" (: #\a ($ (** 3 5 nonl)) #\b)) +(test-ssre "a(.{3,5}?)b" (: #\a ($ (**? 3 5 nonl)) #\b)) +(test-ssre "(?<=aXb)cd" (: (look-behind (: #\a #\X #\b)) #\c #\d)) +(test-ssre "(?<=(.))X" (: (look-behind ($ nonl)) #\X)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "\\D" (~ numeric)) +(test-ssre ">\\S" (: #\> (~ space))) +(test-ssre "\\d" numeric) +(test-ssre "\\s" space) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "\\D{2,3}" (** 2 3 (~ numeric))) +(test-ssre "\\D{2,3}?" (**? 2 3 (~ numeric))) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d{2,3}" (** 2 3 numeric)) +(test-ssre "\\d{2,3}?" (**? 2 3 numeric)) +(test-ssre "\\S+" (+ (~ space))) +(test-ssre "\\S{2,3}" (** 2 3 (~ space))) +(test-ssre "\\S{2,3}?" (**? 2 3 (~ space))) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s{2,3}<" (: #\> (** 2 3 space) #\<)) +(test-ssre ">\\s{2,3}?<" (: #\> (**? 2 3 space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w{2,3}" (** 2 3 (or alnum #\_))) +(test-ssre "\\w{2,3}?" (**? 2 3 (or alnum #\_))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}" (** 2 3 (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}?" (**? 2 3 (~ (or alnum #\_)))) +(test-ssre "^[ac]*b" (: bos (* (or #\a #\c)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^\\d*b" (: bos (* numeric) #\b)) +(test-ssre "(|a)" ($ (or (:) #\a))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "[\\p{Nd}]" numeric) +(test-ssre "[\\p{Nd}+-]+" (+ (or numeric #\+ #\-))) +(test-ssre "[\\P{Nd}]+" (+ (~ numeric))) +(test-ssre "^[\\p{Vowel}]" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^[\\p{Any}]X" (: bos any #\X)) +(test-ssre "^[\\P{Any}]X" (: bos (~ any) #\X)) +(test-ssre "^[\\p{Any}]?X" (: bos (? any) #\X)) +(test-ssre "[.\\p{Lu}][.\\p{Ll}][.\\P{Lu}][.\\P{Ll}]" (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) +(test-ssre "[\\p{L}]" alpha) +(test-ssre "[\\P{L}]" (~ alpha)) +(test-ssre "[\\pLu]" (or alpha #\u)) +(test-ssre "[\\PLu]" (or (~ alpha) #\u)) +(test-ssre "\\p{Nd}" numeric) +(test-ssre "\\p{Nd}+" (+ numeric)) +(test-ssre "\\P{Nd}+" (+ (~ numeric))) +(test-ssre "^\\p{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^\\p{Any}X" (: bos any #\X)) +(test-ssre "^\\P{Any}X" (: bos (~ any) #\X)) +(test-ssre "^\\p{Any}?X" (: bos (? any) #\X)) +(test-ssre "\\p{L}" alpha) +(test-ssre "\\P{L}" (~ alpha)) +(test-ssre "\\pLu" (: alpha #\u)) +(test-ssre "\\PLu" (: (~ alpha) #\u)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "ist" (: #\i #\s #\t)) +(test-ssre "is+t" (: #\i (+ #\s) #\t)) +(test-ssre "is+?t" (: #\i (**? 1 #f #\s) #\t)) +(test-ssre "is?t" (: #\i (? #\s) #\t)) +(test-ssre "is{2}t" (: #\i (= 2 #\s) #\t)) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "AskZ" (: #\A #\s #\k #\Z)) +(test-ssre "[AskZ]+" (+ (or #\A #\s #\k #\Z))) +(test-ssre "[^s]+" (+ (~ #\s))) +(test-ssre "[^s]+" (+ (~ #\s))) +(test-ssre "[^k]+" (+ (~ #\k))) +(test-ssre "[^k]+" (+ (~ #\k))) +(test-ssre "[^sk]+" (+ (~ (or #\s #\k)))) +(test-ssre "[^sk]+" (+ (~ (or #\s #\k)))) +(test-ssre "i" #\i) +(test-ssre "I" #\I) +(test-ssre "[i]" #\i) +(test-ssre "[zi]" (or #\z #\i)) +(test-ssre "[iI]" (or #\i #\I)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\b.+?\\b" (: (or bow eow) (**? 1 #f nonl) (or bow eow))) +(test-ssre "caf\\B.+?\\B" (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) +(test-ssre "x{1,3}+" (+ (** 1 3 #\x))) +(test-ssre "[a]" #\a) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "(?<=C\n)^" (: (look-behind (: #\C #\newline)) bos)) +(test-ssre "\\w+(?=\t)" (: (+ (or alnum #\_)) (look-ahead #\tab))) -(test-begin "srfi-257") +; new set notation tests -(test-ssre "the quick brown fox" () (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x)) -(test-ssre "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz" () (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) -(test-ssre "^(abc){1,2}zz" () (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}c" () (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "^(ba|b*){1,2}?bc" () (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) -(test-ssre "^[ab\\]cde]" () (: bos (or #\a #\b #\] #\c #\d #\e))) -(test-ssre "^[]cde]" () (: bos (or #\] #\c #\d #\e))) -(test-ssre "^[^ab\\]cde]" () (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) -(test-ssre "^[^]cde]" () (: bos (~ (or #\] #\c #\d #\e)))) -(test-ssre "^@" () (: bos #\@)) -(test-ssre "^[0-9]+$" () (: bos (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.*nter" () (: bos (* nonl) #\n #\t #\e #\r)) -(test-ssre "^xxx[0-9]+$" () (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.+[0-9][0-9][0-9]$" () (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^.+?[0-9][0-9][0-9]$" () (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" () (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) -(test-ssre ":" () #\:) -(test-ssre "([\\da-f:]+)$" () (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos)) -(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" () (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" () (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) -(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" () (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) -(test-ssre "^(?=ab(de))(abd)(e)" () (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) -(test-ssre "^(?!(ab)de|x)(abd)(f)" () (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) -(test-ssre "^(?=(ab(cd)))(ab)" () (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) -(test-ssre "^[\\da-f](\\.[\\da-f])*$" () (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos)) -(test-ssre "^\".*\"\\s*(;.*)?$" () (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos)) -(test-ssre "^$" () (: bos eos)) -(test-ssre "^ a\\ b[c ]d $" (x) (: bos #\a #\space #\b (or #\c #\space) #\d eos)) -(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" () (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) -(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" () (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) -(test-ssre "^[\\w][\\W][\\s][\\S][\\d][\\D]\\]" () (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\])) -(test-ssre "^[.^$|()*+?{,}]+" () (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) -(test-ssre "^a*\\w" () (: bos (* #\a) (or alnum #\_))) -(test-ssre "^a*?\\w" () (: bos (*? #\a) (or alnum #\_))) -(test-ssre "^a+\\w" () (: bos (+ #\a) (or alnum #\_))) -(test-ssre "^a+?\\w" () (: bos (**? 1 #f #\a) (or alnum #\_))) -(test-ssre "^\\d{8}\\w{2,}" () (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) -(test-ssre "^[aeiou\\d]{4,5}$" () (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) -(test-ssre "^[aeiou\\d]{4,5}?" () (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) -(test-ssre "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]" () (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" () (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric)) -(test-ssre "^12.34" () (: bos #\1 #\2 nonl #\3 #\4)) -(test-ssre "foo(?!bar)(.*)" () (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) -(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" () (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "(?!^)abc" () (: (neg-look-ahead bos) #\a #\b #\c)) -(test-ssre "(?=^)abc" () (: (look-ahead bos) #\a #\b #\c)) -(test-ssre "^[ab]{1,3}(ab*|b)" () (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) -(test-ssre "^[ab]{1,3}?(ab*|b)" () (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) -(test-ssre "^[ab]{1,3}?(ab*?|b)" () (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) -(test-ssre "^[ab]{1,3}(ab*?|b)" () (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" () (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) -(test-ssre "^[W-c]+$" () (: bos (+ (char-range #\W #\c)) eos)) -(test-ssre "^[?-_]+$" () (: bos (+ (char-range #\? #\_)) eos)) +(test-ssre "{Nd}" numeric) +(test-ssre "{Nd|[+]|[-]}+" (+ (or numeric #\+ #\-))) +(test-ssre "{~Nd}+" (+ (~ numeric))) +(test-ssre "^{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^{Any}X" (: bos any #\X)) +(test-ssre "^{~Any}X" (: bos (~ any) #\X)) +(test-ssre "^{Any}?X" (: bos (? any) #\X)) +(test-ssre "{[.]|Lu}{[.]|Ll}{[.]|~Lu}{[.]|~Ll}" (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) +(test-ssre "{L}" alpha) +(test-ssre "{~L}" (~ alpha)) +(test-ssre "{L}u" (: alpha #\u)) +(test-ssre "{~L}u" (: (~ alpha) #\u)) +(test-ssre "{L-Vowel}u" (: (- alpha (or #\A #\E #\I #\O #\U #\Y #\W)) #\u)) +(test-ssre "{Nd}" numeric) +(test-ssre "{Nd}+" (+ numeric)) +(test-ssre "{~Nd}+" (+ (~ numeric))) +(test-ssre "^{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^{Any}X" (: bos any #\X)) +(test-ssre "^{~Any}X" (: bos (~ any) #\X)) +(test-ssre "^{Any}?X" (: bos (? any) #\X)) +(test-ssre "{u}{l|d}*" (: upper (* (or lower numeric)))) +(test-ssre "{~d}{an|[']}*" (: (~ numeric) (* (or alnum #\')))) +(test-ssre "{<}{u&~Vowel|d}{!b}{an-d}*{>}" (: bow (or (- upper (or #\A #\E #\I #\O #\U #\Y #\W)) numeric) nwb (* (- alnum numeric)) eow)) +(test-ssre "{}\\X*" (: grapheme (* grapheme))) +; selected corner cases +(test-ssre "a{}b" (: #\a (or) #\b)) +(test-ssre "a{{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}|{}|{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}&{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}&{{}-{}-{}}}b" (: #\a (or) #\b)) +(test-ssre "{?i-u:?u:an&{l|d}}" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "(?x){ ?i-u: ?u: an & { l | d } }" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "(?x){ ?i-u: ?u: an & { l | d } }" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "{?x: an & { ?i-u: l | d } }" (and alnum (w/nocase (w/ascii (or lower numeric))))) +(test-ssre "{an&{?i:{?-u:l|d}}}" (and alnum (w/nocase (w/ascii (or lower numeric))))) +(test-ssre "(?i)(?-u){l|d}" (w/nocase (w/ascii (or lower numeric)))) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "\\Aabc\\z" () (: bos #\a #\b #\c eos)) -(test-ssre "\\A(.)*\\z" () (: bos (* ($ nonl)) eos)) -(test-ssre "(?:b)|(?::+)" () (or #\b (+ #\:))) -(test-ssre "[-az]+" () (+ (or #\- #\a #\z))) -(test-ssre "[az-]+" () (+ (or #\a #\z #\-))) -(test-ssre "[a\\-z]+" () (+ (or #\a #\- #\z))) -(test-ssre "[a-z]+" () (+ (char-range #\a #\z))) -(test-ssre "[\\d-]+" () (+ (or numeric #\-))) -(test-ssre "\\\\" () #\\) -(test-ssre "a{0}bc" () (: (= 0 #\a) #\b #\c)) -(test-ssre "(a|(bc)){0,0}?xyz" () (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) -(test-ssre "^([^a])([^b])([^c]*)([^d]{3,4})" () (: bos ($ (~ #\a)) ($ (~ #\b)) ($ (* (~ #\c))) ($ (** 3 4 (~ #\d))))) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^k]$" () (: (~ #\k) eos)) -(test-ssre "[^k]{2,3}$" () (: (** 2 3 (~ #\k)) eos)) -(test-ssre "^\\d{8,}@.+[^k]$" () (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^az]" () (~ (or #\a #\z))) -(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) -(test-ssre "P[^*]TAIRE[^*]{1,}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) -(test-ssre "(\\.\\d\\d[1-9]?)\\d+" () (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) -(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" () ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) -(test-ssre "\\b(foo)\\s+(\\w+)" () (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_))))) -(test-ssre "foo(.*)bar" () (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) -(test-ssre "foo(.*?)bar" () (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) -(test-ssre "(.*)(\\d*)" () (: ($ (* nonl)) ($ (* numeric)))) -(test-ssre "(.*)(\\d+)" () (: ($ (* nonl)) ($ (+ numeric)))) -(test-ssre "(.*?)(\\d*)" () (: ($ (*? nonl)) ($ (* numeric)))) -(test-ssre "(.*?)(\\d+)" () (: ($ (*? nonl)) ($ (+ numeric)))) -(test-ssre "(.*)(\\d+)$" () (: ($ (* nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*?)(\\d+)$" () (: ($ (*? nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*)\\b(\\d+)$" () (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) -(test-ssre "(.*\\D)(\\d+)$" () (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) -(test-ssre "^\\D*(?!123)" () (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^[W-]46\\]" () (: bos (or #\W #\-) #\4 #\6 #\])) -(test-ssre "^[W-\\]46]" () (: bos (or (char-range #\W #\]) #\4 #\6))) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" () (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" () (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "^(a){0,0}" () (: bos (= 0 ($ #\a)))) -(test-ssre "^(a){0,1}" () (: bos (** 0 1 ($ #\a)))) -(test-ssre "^(a){0,2}" () (: bos (** 0 2 ($ #\a)))) -(test-ssre "^(a){0,3}" () (: bos (** 0 3 ($ #\a)))) -(test-ssre "^(a){0,}" () (: bos (>= 0 ($ #\a)))) -(test-ssre "^(a){1,1}" () (: bos (= 1 ($ #\a)))) -(test-ssre "^(a){1,2}" () (: bos (** 1 2 ($ #\a)))) -(test-ssre "^(a){1,3}" () (: bos (** 1 3 ($ #\a)))) -(test-ssre "^(a){1,}" () (: bos (>= 1 ($ #\a)))) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".{0,}\\.gif" () (: (>= 0 nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "^.*B" () (: bos (* nonl) #\B)) -(test-ssre "(?m)^.*B" () (: bol (* nonl) #\B)) -(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" () (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[abc]{12}" () (: bos (= 12 (or #\a #\b #\c)))) -(test-ssre "^[a-c]{12}" () (: bos (= 12 (char-range #\a #\c)))) -(test-ssre "^(a|b|c){12}" () (: bos (= 12 ($ (or #\a #\b #\c))))) -(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" () (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) -(test-ssre "abcde{0,0}" () (: #\a #\b #\c #\d (= 0 #\e))) -(test-ssre "ab[cd]{0,0}e" () (: #\a #\b (= 0 (or #\c #\d)) #\e)) -(test-ssre "ab(c){0,0}d" () (: #\a #\b (= 0 ($ #\c)) #\d)) -(test-ssre "a(b*)" () (: #\a ($ (* #\b)))) -(test-ssre "ab\\d{0}e" () (: #\a #\b (= 0 numeric) #\e)) -(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" () (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) -(test-ssre ".*?" () (*? nonl)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?c" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "(?!\\A)x" () (: (neg-look-ahead bos) #\x)) -(test-ssre "(A|B)*?CD" () (: (*? ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(A|B)*CD" () (: (* ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(?= 0 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "ab{3,4}bc" () (: #\a (** 3 4 #\b) #\b #\c)) -(test-ssre "ab{4,5}bc" () (: #\a (** 4 5 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab{0,1}bc" () (: #\a (** 0 1 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab?c" () (: #\a (? #\b) #\c)) -(test-ssre "ab{0,1}c" () (: #\a (** 0 1 #\b) #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "^" () bos) -(test-ssre "$" () eos) -(test-ssre "a.c" () (: #\a nonl #\c)) -(test-ssre "a.*c" () (: #\a (* nonl) #\c)) -(test-ssre "a[bc]d" () (: #\a (or #\b #\c) #\d)) -(test-ssre "a[b-d]e" () (: #\a (char-range #\b #\d) #\e)) -(test-ssre "a[b-d]" () (: #\a (char-range #\b #\d))) -(test-ssre "a[-b]" () (: #\a (or #\- #\b))) -(test-ssre "a[b-]" () (: #\a (or #\b #\-))) -(test-ssre "a\\]" () (: #\a #\])) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[^bc]d" () (: #\a (~ (or #\b #\c)) #\d)) -(test-ssre "a[^-b]c" () (: #\a (~ (or #\- #\b)) #\c)) -(test-ssre "a[^]b]c" () (: #\a (~ (or #\] #\b)) #\c)) -(test-ssre "\\ba\\b" () (: (or bow eow) #\a (or bow eow))) -(test-ssre "\\by\\b" () (: (or bow eow) #\y (or bow eow))) -(test-ssre "\\Ba\\B" () (: nwb #\a nwb)) -(test-ssre "\\By\\b" () (: nwb #\y (or bow eow))) -(test-ssre "\\by\\B" () (: (or bow eow) #\y nwb)) -(test-ssre "\\By\\B" () (: nwb #\y nwb)) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "a\\sb" () (: #\a space #\b)) -(test-ssre "a\\Sb" () (: #\a (~ space) #\b)) -(test-ssre "\\d" () numeric) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "ab|cd" () (or (: #\a #\b) (: #\c #\d))) -(test-ssre "()ef" () (: ($ (:)) #\e #\f)) -(test-ssre "$b" () (: eos #\b)) -(test-ssre "a\\(b" () (: #\a #\( #\b)) -(test-ssre "a\\(*b" () (: #\a (* #\() #\b)) -(test-ssre "a\\\\b" () (: #\a #\\ #\b)) -(test-ssre "((a))" () ($ ($ #\a))) -(test-ssre "(a)b(c)" () (: ($ #\a) #\b ($ #\c))) -(test-ssre "a+b+c" () (: (+ #\a) (+ #\b) #\c)) -(test-ssre "a{1,}b{1,}c" () (: (>= 1 #\a) (>= 1 #\b) #\c)) -(test-ssre "a.+?c" () (: #\a (**? 1 #f nonl) #\c)) -(test-ssre "(a+|b)*" () (* ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,}" () (>= 0 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)+" () (+ ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){1,}" () (>= 1 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)?" () (? ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,1}" () (** 0 1 ($ (or (+ #\a) #\b)))) -(test-ssre "[^ab]*" () (* (~ (or #\a #\b)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a*" () (* #\a)) -(test-ssre "([abc])*d" () (: (* ($ (or #\a #\b #\c))) #\d)) -(test-ssre "([abc])*bcd" () (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) -(test-ssre "a|b|c|d|e" () (or #\a #\b #\c #\d #\e)) -(test-ssre "(a|b|c|d|e)f" () (: ($ (or #\a #\b #\c #\d #\e)) #\f)) -(test-ssre "abcd*efg" () (: #\a #\b #\c (* #\d) #\e #\f #\g)) -(test-ssre "ab*" () (: #\a (* #\b))) -(test-ssre "(ab|cd)e" () (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "[abhgefdc]ij" () (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) -(test-ssre "^(ab|cd)e" () (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "(abc|)ef" () (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) -(test-ssre "(a|b)c*d" () (: ($ (or #\a #\b)) (* #\c) #\d)) -(test-ssre "(ab|ab*)bc" () (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) -(test-ssre "a([bc]*)c*" () (: #\a ($ (* (or #\b #\c))) (* #\c))) -(test-ssre "a([bc]*)(c*d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]+)(c*d)" () (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]*)(c+d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) -(test-ssre "a[bcd]*dcdcde" () (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "a[bcd]+dcdcde" () (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "(ab|a)b*c" () (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) -(test-ssre "((a)(b)c)(d)" () (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) -(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" () (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) -(test-ssre "^a(bc+|b[eh])g|.h$" () (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) -(test-ssre "(bc+d$|ef*g.|h?i(j|k))" () ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) -(test-ssre "((((((((((a))))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) -(test-ssre "(((((((((a)))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) -(test-ssre "multiple words of text" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) -(test-ssre "multiple words" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) -(test-ssre "(.*)c(.*)" () (: ($ (* nonl)) #\c ($ (* nonl)))) -(test-ssre "\\((.*), (.*)\\)" () (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) -(test-ssre "[k]" () #\k) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(bc)d" () (: #\a ($ (: #\b #\c)) #\d)) -(test-ssre "a[-]?c" () (: #\a (? #\-) #\c)) -(test-ssre "a(?!b)." () (: #\a (neg-look-ahead #\b) nonl)) -(test-ssre "a(?=d)." () (: #\a (look-ahead #\d) nonl)) -(test-ssre "a(?=c|d)." () (: #\a (look-ahead (or #\c #\d)) nonl)) -(test-ssre "a(?:b|c|d)(.)" () (: #\a (or #\b #\c #\d) ($ nonl))) -(test-ssre "a(?:b|c|d)*(.)" () (: #\a (* (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+?(.)" () (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+(.)" () (: #\a (+ (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){2}(.)" () (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}(.)" () (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}?(.)" () (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}(.)" () (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}?(.)" () (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}(.)" () (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}?(.)" () (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}(.)" () (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}?(.)" () (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" () (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) -(test-ssre "^(.+)?B" () (: bos (? ($ (+ nonl))) #\B)) -(test-ssre "^([^a-z])|(\\^)$" () (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) -(test-ssre "^[<>]&" () (: bos (or #\< #\>) #\&)) -(test-ssre "(?<=a)b" () (: (look-behind #\a) #\b)) -(test-ssre "(?a+)ab" () (: ($ (: #\> (+ #\a))) #\a #\b)) -(test-ssre "b\\z" () (: #\b eos)) -(test-ssre "(?<=\\d{3}(?!999))foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) -(test-ssre "(?<=(?!...999)\\d{3})foo" () (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}(?!999)...)foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_))) -(test-ssre "a*\\d*\\w" () (: (* #\a) (* numeric) (or alnum #\_))) -(test-ssre "a*b *\\w" (x) (: (* #\a) (* #\b) (or alnum #\_))) -(test-ssre "a* b *\\w" (x) (: (* #\a) (* #\b) (or alnum #\_))) -(test-ssre "\\z(?= 2 ($ (** 2 3 #\a)))) #\a)) -(test-ssre "(?=C)" () (look-ahead #\C)) -(test-ssre "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)" () (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9)))))) -(test-ssre "^(a){2,}+(\\w)" () (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_)))) -(test-ssre "^(?:a){2,}+(\\w)" () (: bos (+ (>= 2 #\a)) ($ (or alnum #\_)))) -(test-ssre "\\A.*?(a|bc)" () (: bos (*? nonl) ($ (or #\a (: #\b #\c))))) -(test-ssre "\\A.*?(?:a|bc|d)" () (: bos (*? nonl) (or #\a (: #\b #\c) #\d))) -(test-ssre "(?:.*?a)(?<=ba)" () (: (*? nonl) #\a (look-behind (: #\b #\a)))) -(test-ssre "a(?=bc).|abd" () (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d))) -(test-ssre "\\A.*?(?:a|bc)" () (: bos (*? nonl) (or #\a (: #\b #\c)))) -(test-ssre "^\\d*\\w{4}" () (: bos (* numeric) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^a*\\w{4}" () (: bos (* #\a) (= 4 (or alnum #\_)))) -(test-ssre "(?:(?foo)|(?bar))\\k" () (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "(?A)(?:(?foo)|(?bar))\\k" () (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc" () (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c)) -(test-ssre "\\sabc" () (: space #\a #\b #\c)) -(test-ssre "Z*(|d*){216}" () (: (* #\Z) (= 216 ($ (or (:) (* #\d)))))) -(test-ssre "(?<=a(B){0}c)X" () (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X)) -(test-ssre "a+(?:|b)a" () (: (+ #\a) (or (:) #\b) #\a)) -(test-ssre "X?(R||){3335}" () (: (? #\X) (= 3335 ($ (or #\R (:) (:)))))) -(test-ssre "(?!(b))c|b" () (or (: (neg-look-ahead ($ #\b)) #\c) #\b)) -(test-ssre "(?=(b))b|c" () (or (: (look-ahead ($ #\b)) #\b) #\c)) -(test-ssre "<(?x:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?xxx:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?-x:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "[[:digit:]-]+" () (+ (or numeric #\-))) -(test-ssre "(?<=(?=.)?)" () (look-behind (? (look-ahead nonl)))) -(test-ssre "(?<=(?=.){4,5})" () (look-behind (** 4 5 (look-ahead nonl)))) -(test-ssre "(?<=(?=.){4,5}x)" () (look-behind (: (** 4 5 (look-ahead nonl)) #\x))) -(test-ssre " (? \\w+ )* \\. " () (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space)) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "(?<=(?=(?<=a)))b" () (: (look-behind (look-ahead (look-behind #\a))) #\b)) -(test-ssre "(?<=ab?c)..." () (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl)) -(test-ssre "(?<=PQR|ab?c)..." () (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl)) -(test-ssre "(?<=ab?c|PQR)..." () (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl)) -(test-ssre "(?<=PQ|ab?c)..." () (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl)) -(test-ssre "(?<=ab?c|PQ)..." () (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl)) -(test-ssre "(?<=a(b?c|d?e?e)f)X." () (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl)) -(test-ssre "(?= 5 (char-range #\a #\z)) #\b) #\x)) -(test-ssre "[a-z]{1,6}?s|x" () (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x)) -(test-ssre "[@]" () #\@) -(test-ssre "@" () #\@) -(test-ssre "@@@xxx" () (: #\@ #\@ #\@ #\x #\x #\x)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "shortutf" () (: #\s #\h #\o #\r #\t #\u #\t #\f)) -(test-ssre "anything" () (: #\a #\n #\y #\t #\h #\i #\n #\g)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=x)badutf" () (: (look-behind #\x) #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=xx)badutf" () (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=xxxx)badutf" () (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f)) -(test-ssre "X" () #\X) -(test-ssre "a+" () (+ #\a)) -(test-ssre "A" () #\A) -(test-ssre "x" () #\x) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "X" () #\X) -(test-ssre "(?<=.)X" () (: (look-behind nonl) #\X)) -(test-ssre "a+" () (+ #\a)) -(test-ssre "a" () #\a) -(test-ssre "." () nonl) -(test-ssre "s" () #\s) -(test-ssre "[^s]" () (~ #\s)) -(test-ssre "a(?:.)*?a" () (: #\a (*? nonl) #\a)) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "a\\b" () (: #\a (or bow eow))) -(test-ssre "abc(?=abcde)(?=ab)" () (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) -(test-ssre "(?<=abc)123" () (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) -(test-ssre "\\babc\\b" () (: (or bow eow) #\a #\b #\c (or bow eow))) -(test-ssre "(?<=abc)def" () (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) -(test-ssre "abc(?<=bc)def" () (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f)) -(test-ssre "(?<=ab)cdef" () (: (look-behind (: #\a #\b)) #\c #\d #\e #\f)) -(test-ssre "b(?tom|bon)-\\k" () (: (-> A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A))) -(test-ssre "Xa{2,4}b" () (: #\X (** 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}?b" () (: #\X (**? 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}+b" () (: #\X (+ (** 2 4 #\a)) #\b)) -(test-ssre "X\\d{2,4}b" () (: #\X (** 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}?b" () (: #\X (**? 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}+b" () (: #\X (+ (** 2 4 numeric)) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X[abc]{2,4}b" () (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}?b" () (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}+b" () (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "Z(?!)" () (: #\Z (neg-look-ahead (:)))) -(test-ssre "dog(sbody)?" () (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y))))) -(test-ssre "dog(sbody)??" () (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y))))) -(test-ssre "dog|dogsbody" () (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y))) -(test-ssre "dogsbody|dog" () (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g))) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "(?<=abc)123" () (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) -(test-ssre "\\babc\\b" () (: (or bow eow) #\a #\b #\c (or bow eow))) -(test-ssre "a?b?" () (: (? #\a) (? #\b))) -(test-ssre "^a?b?" () (: bos (? #\a) (? #\b))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "(?<=abc)def" () (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "abc\\z" () (: #\a #\b #\c eos)) -(test-ssre "abc\\b" () (: #\a #\b #\c (or bow eow))) -(test-ssre "abc\\B" () (: #\a #\b #\c nwb)) -(test-ssre ".+" () (+ nonl)) -(test-ssre "(?<=(abc)+)X" () (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X)) -(test-ssre "(a)b|ac" () (or (: ($ #\a) #\b) (: #\a #\c))) -(test-ssre "(a)(b)x|abc" () (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c))) -(test-ssre "(?:(foo)|(bar)|(baz))X" () (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X)) -(test-ssre "(ab)x|ab" () (or (: ($ (: #\a #\b)) #\x) (: #\a #\b))) -(test-ssre "(((((a)))))" () ($ ($ ($ ($ ($ #\a)))))) -(test-ssre "a*?b*?" () (: (*? #\a) (*? #\b))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a(b)c" () (: #\a ($ #\b) #\c)) -(test-ssre "(a)(b)|(c)" () (or (: ($ #\a) ($ #\b)) ($ #\c))) -(test-ssre "(?a)|(?b)" () (or (-> A #\a) (-> A #\b))) -(test-ssre "a(b)c(d)" () (: #\a ($ #\b) #\c ($ #\d))) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre ".*\\d" () (: (* nonl) numeric)) -(test-ssre "(abc)*" () (* ($ (: #\a #\b #\c)))) -(test-ssre "^" () bos) -(test-ssre "(?:ab)?(?:ab)(?:ab)" () (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "(abcd)" () ($ (: #\a #\b #\c #\d))) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(b)c" () (: #\a ($ #\b) #\c)) -(test-ssre "a[[:punct:]b]" () (: #\a (or punct #\b))) -(test-ssre "a[b[:punct:]]" () (: #\a (or #\b punct))) -(test-ssre "0b 28 3f 2d 78 29 3a" () (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a)) -(test-ssre "a|(b)c" () (or #\a (: ($ #\b) #\c))) -(test-ssre "efg" () (: #\e #\f #\g)) -(test-ssre "eff" () (: #\e #\f #\f)) -(test-ssre "effg" () (: #\e #\f #\f #\g)) -(test-ssre "aaa" () (: #\a #\a #\a)) -(test-ssre "(?)" () (: #\[ ($ (:)) (= 65535 #\]) (-> A (:)))) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "\\z" () eos) -(test-ssre "\\Z" () (: (? #\newline) eos)) -(test-ssre "(?![ab]).*" () (: (neg-look-ahead (or #\a #\b)) (* nonl))) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "12345(?<=\\d{1,256})X" () (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X)) -(test-ssre "(?foo)|(?bar))\\k" () (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "a?b[]xy]*c" () (: (? #\a) #\b (* (or #\] #\x #\y)) #\c)) -(test-ssre "f*" () (* #\f)) -(test-ssre "foo\\*" () (: #\f #\o #\o #\*)) -(test-ssre "foo\\*bar" () (: #\f #\o #\o #\* #\b #\a #\r)) -(test-ssre "f\\\\oo" () (: #\f #\\ #\o #\o)) -(test-ssre "[ten]" () (or #\t #\e #\n)) -(test-ssre "t[a-g]n" () (: #\t (char-range #\a #\g) #\n)) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[]a-]b" () (: #\a (or #\] #\a #\-) #\b)) -(test-ssre "a[]-]b" () (: #\a (or #\] #\-) #\b)) -(test-ssre "a[]a-z]b" () (: #\a (or #\] (char-range #\a #\z)) #\b)) -(test-ssre "\\]" () #\]) -(test-ssre "t[!a-g]n" () (: #\t (or #\! (char-range #\a #\g)) #\n)) -(test-ssre "A[+-0]B" () (: #\A (char-range #\+ #\0) #\B)) -(test-ssre "a[--0]z" () (: #\a (char-range #\- #\0) #\z)) -(test-ssre "a[[:digit:].]z" () (: #\a (or numeric #\.) #\z)) -(test-ssre "A\\B\\\\C\\D" () (: #\A nwb #\\ #\C (~ numeric))) -(test-ssre "a*b" () (: (* #\a) #\b)) -(test-ssre "<[]bc]>" () (: #\< (or #\] #\b #\c) #\>)) -(test-ssre "<[^]bc]>" () (: #\< (~ (or #\] #\b #\c)) #\>)) -(test-ssre "a*b+c\\+[def](ab)\\(cd\\)" () (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\))) -(test-ssre "how.to how\\.to" () (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o)) -(test-ssre "^how to \\^how to" () (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o)) -(test-ssre "^b\\(c^d\\)\\(^e^f\\)" () (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\))) -(test-ssre "\\[()\\]{65535}()" () (: #\[ ($ (:)) (= 65535 #\]) ($ (:)))) -(test-ssre "^A" () (: bos #\A)) -(test-ssre "^\\w+" () (: bos (+ (or alnum #\_)))) -(test-ssre "(.+)\\b(.+)" () (: ($ (+ nonl)) (or bow eow) ($ (+ nonl)))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.{3,})b" () (: #\a ($ (>= 3 nonl)) #\b)) -(test-ssre "a(.{3,}?)b" () (: #\a ($ (**? 3 #f nonl)) #\b)) -(test-ssre "a(.{3,5})b" () (: #\a ($ (** 3 5 nonl)) #\b)) -(test-ssre "a(.{3,5}?)b" () (: #\a ($ (**? 3 5 nonl)) #\b)) -(test-ssre "(?<=aXb)cd" () (: (look-behind (: #\a #\X #\b)) #\c #\d)) -(test-ssre "(?<=(.))X" () (: (look-behind ($ nonl)) #\X)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "\\D*" () (* (~ numeric))) -(test-ssre "\\D*" () (* (~ numeric))) -(test-ssre "\\D" () (~ numeric)) -(test-ssre ">\\S" () (: #\> (~ space))) -(test-ssre "\\d" () numeric) -(test-ssre "\\s" () space) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "\\D{2,3}" () (** 2 3 (~ numeric))) -(test-ssre "\\D{2,3}?" () (**? 2 3 (~ numeric))) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d{2,3}" () (** 2 3 numeric)) -(test-ssre "\\d{2,3}?" () (**? 2 3 numeric)) -(test-ssre "\\S+" () (+ (~ space))) -(test-ssre "\\S{2,3}" () (** 2 3 (~ space))) -(test-ssre "\\S{2,3}?" () (**? 2 3 (~ space))) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s{2,3}<" () (: #\> (** 2 3 space) #\<)) -(test-ssre ">\\s{2,3}?<" () (: #\> (**? 2 3 space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w{2,3}" () (** 2 3 (or alnum #\_))) -(test-ssre "\\w{2,3}?" () (**? 2 3 (or alnum #\_))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}" () (** 2 3 (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}?" () (**? 2 3 (~ (or alnum #\_)))) -(test-ssre "^[ac]*b" () (: bos (* (or #\a #\c)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^\\d*b" () (: bos (* numeric) #\b)) -(test-ssre "(|a)" () ($ (or (:) #\a))) -(test-ssre "\\S\\S" () (: (~ space) (~ space))) -(test-ssre "\\S{2}" () (= 2 (~ space))) -(test-ssre "\\W\\W" () (: (~ (or alnum #\_)) (~ (or alnum #\_)))) -(test-ssre "\\W{2}" () (= 2 (~ (or alnum #\_)))) -(test-ssre "\\S" () (~ space)) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre ".[^\\S\n]." () (: nonl (~ (or (~ space) #\newline)) nonl)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "A*" () (* #\A)) -(test-ssre "." () nonl) -(test-ssre "^\\d*\\w{4}" () (: bos (* numeric) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^.\\B.\\B." () (: bos nonl nwb nonl nwb nonl)) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "^\\w+" () (: bos (+ (or alnum #\_)))) -(test-ssre "^\\d+" () (: bos (+ numeric))) -(test-ssre "^>\\s+" () (: bos #\> (+ space))) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "[RST]+" () (+ (or #\R #\S #\T))) -(test-ssre "[R-T]+" () (+ (char-range #\R #\T))) -(test-ssre "[q-u]+" () (+ (char-range #\q #\u))) -(test-ssre "^s?c" () (: bos (? #\s) #\c)) -(test-ssre "[A-`]" () (char-range #\A #\`)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\b.+?\\b" () (: (or bow eow) (**? 1 #f nonl) (or bow eow))) -(test-ssre "caf\\B.+?\\B" () (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) -(test-ssre "c3 b1" () (: #\c #\3 #\space #\b #\1)) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "Xa{2,4}b" () (: #\X (** 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}?b" () (: #\X (**? 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}+b" () (: #\X (+ (** 2 4 #\a)) #\b)) -(test-ssre "X\\d{2,4}b" () (: #\X (** 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}?b" () (: #\X (**? 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}+b" () (: #\X (+ (** 2 4 numeric)) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X[abc]{2,4}b" () (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}?b" () (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}+b" () (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "X\\W{3}X" () (: #\X (= 3 (~ (or alnum #\_))) #\X)) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "(?ss)|(?kk)) \\k" () (: (or (-> A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A))) -(test-ssre "(?:(?s)|(?k)) \\k{3,}!" () (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!)) -(test-ssre "i" () #\i) -(test-ssre "I" () #\I) -(test-ssre "[i]" () #\i) -(test-ssre "[^i]" () (~ #\i)) -(test-ssre "[zi]" () (or #\z #\i)) -(test-ssre "[iI]" () (or #\i #\I)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\bABC\\b" () (: (or bow eow) #\A #\B #\C (or bow eow))) -(test-ssre "\\bABC\\b" () (: (or bow eow) #\A #\B #\C (or bow eow))) -(test-ssre "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) -(test-ssre "^(abc){1,2}zz" () (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}c" () (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?bc" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c)) -(test-ssre "^(b*|ba){1,2}?bc" () (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c)) -(test-ssre "^(ba|b*){1,2}?bc" () (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) -(test-ssre "^[ab\\]cde]" () (: bos (or #\a #\b #\] #\c #\d #\e))) -(test-ssre "^[]cde]" () (: bos (or #\] #\c #\d #\e))) -(test-ssre "^[^ab\\]cde]" () (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) -(test-ssre "^[^]cde]" () (: bos (~ (or #\] #\c #\d #\e)))) -(test-ssre "^@" () (: bos #\@)) -(test-ssre "^[0-9]+$" () (: bos (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.*nter" () (: bos (* nonl) #\n #\t #\e #\r)) -(test-ssre "^xxx[0-9]+$" () (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.+[0-9][0-9][0-9]$" () (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^.+?[0-9][0-9][0-9]$" () (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" () (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) -(test-ssre ":" () #\:) -(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" () (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" () (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) -(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" () (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) -(test-ssre "^(?=ab(de))(abd)(e)" () (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) -(test-ssre "^(?!(ab)de|x)(abd)(f)" () (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) -(test-ssre "^(?=(ab(cd)))(ab)" () (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) -(test-ssre "^$" () (: bos eos)) -(test-ssre "^ a\\ b[c ]d $" (x) (: bos #\a #\space #\b (or #\c #\space) #\d eos)) -(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" () (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) -(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" () (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) -(test-ssre "^[.^$|()*+?{,}]+" () (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) -(test-ssre "^a*\\w" () (: bos (* #\a) (or alnum #\_))) -(test-ssre "^a*?\\w" () (: bos (*? #\a) (or alnum #\_))) -(test-ssre "^a+\\w" () (: bos (+ #\a) (or alnum #\_))) -(test-ssre "^a+?\\w" () (: bos (**? 1 #f #\a) (or alnum #\_))) -(test-ssre "^\\d{8}\\w{2,}" () (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) -(test-ssre "^[aeiou\\d]{4,5}$" () (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) -(test-ssre "^[aeiou\\d]{4,5}?" () (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) -(test-ssre "^12.34" () (: bos #\1 #\2 nonl #\3 #\4)) -(test-ssre "foo(?!bar)(.*)" () (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) -(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" () (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "(?!^)abc" () (: (neg-look-ahead bos) #\a #\b #\c)) -(test-ssre "(?=^)abc" () (: (look-ahead bos) #\a #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" () (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) -(test-ssre "^[W-c]+$" () (: bos (+ (char-range #\W #\c)) eos)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "\\Aabc\\Z" () (: bos #\a #\b #\c (: (? #\newline) eos))) -(test-ssre "\\A(.)*\\Z" () (: bos (* ($ nonl)) (: (? #\newline) eos))) -(test-ssre "(?:b)|(?::+)" () (or #\b (+ #\:))) -(test-ssre "[-az]+" () (+ (or #\- #\a #\z))) -(test-ssre "[az-]+" () (+ (or #\a #\z #\-))) -(test-ssre "[a\\-z]+" () (+ (or #\a #\- #\z))) -(test-ssre "[a-z]+" () (+ (char-range #\a #\z))) -(test-ssre "[\\d-]+" () (+ (or numeric #\-))) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "a{0}bc" () (: (= 0 #\a) #\b #\c)) -(test-ssre "(a|(bc)){0,0}?xyz" () (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^k]$" () (: (~ #\k) eos)) -(test-ssre "[^k]{2,3}$" () (: (** 2 3 (~ #\k)) eos)) -(test-ssre "^\\d{8,}@.+[^k]$" () (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^az]" () (~ (or #\a #\z))) -(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) -(test-ssre "P[^*]TAIRE[^*]{1,}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) -(test-ssre "(\\.\\d\\d[1-9]?)\\d+" () (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) -(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" () ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) -(test-ssre "foo(.*)bar" () (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) -(test-ssre "foo(.*?)bar" () (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) -(test-ssre "(.*)(\\d+)" () (: ($ (* nonl)) ($ (+ numeric)))) -(test-ssre "(.*?)(\\d+)" () (: ($ (*? nonl)) ($ (+ numeric)))) -(test-ssre "(.*)(\\d+)$" () (: ($ (* nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*?)(\\d+)$" () (: ($ (*? nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*)\\b(\\d+)$" () (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) -(test-ssre "(.*\\D)(\\d+)$" () (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) -(test-ssre "^\\D*(?!123)" () (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^[W-\\]46]" () (: bos (or (char-range #\W #\]) #\4 #\6))) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" () (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" () (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "^(a){0,0}" () (: bos (= 0 ($ #\a)))) -(test-ssre "^(a){0,1}" () (: bos (** 0 1 ($ #\a)))) -(test-ssre "^(a){0,2}" () (: bos (** 0 2 ($ #\a)))) -(test-ssre "^(a){0,3}" () (: bos (** 0 3 ($ #\a)))) -(test-ssre "^(a){0,}" () (: bos (>= 0 ($ #\a)))) -(test-ssre "^(a){1,1}" () (: bos (= 1 ($ #\a)))) -(test-ssre "^(a){1,2}" () (: bos (** 1 2 ($ #\a)))) -(test-ssre "^(a){1,3}" () (: bos (** 1 3 ($ #\a)))) -(test-ssre "^(a){1,}" () (: bos (>= 1 ($ #\a)))) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".{0,}\\.gif" () (: (>= 0 nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "^.*B" () (: bos (* nonl) #\B)) -(test-ssre "(?m)^.*B" () (: bol (* nonl) #\B)) -(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" () (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[abc]{12}" () (: bos (= 12 (or #\a #\b #\c)))) -(test-ssre "^[a-c]{12}" () (: bos (= 12 (char-range #\a #\c)))) -(test-ssre "^(a|b|c){12}" () (: bos (= 12 ($ (or #\a #\b #\c))))) -(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" () (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) -(test-ssre "abcde{0,0}" () (: #\a #\b #\c #\d (= 0 #\e))) -(test-ssre "ab[cd]{0,0}e" () (: #\a #\b (= 0 (or #\c #\d)) #\e)) -(test-ssre "ab(c){0,0}d" () (: #\a #\b (= 0 ($ #\c)) #\d)) -(test-ssre "a(b*)" () (: #\a ($ (* #\b)))) -(test-ssre "ab\\d{0}e" () (: #\a #\b (= 0 numeric) #\e)) -(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" () (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) -(test-ssre ".*?" () (*? nonl)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?c" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "(?!\\A)x" () (: (neg-look-ahead bos) #\x)) -(test-ssre "(A|B)*CD" () (: (* ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(?= 0 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "ab{3,4}bc" () (: #\a (** 3 4 #\b) #\b #\c)) -(test-ssre "ab{4,5}bc" () (: #\a (** 4 5 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab{0,1}bc" () (: #\a (** 0 1 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab?c" () (: #\a (? #\b) #\c)) -(test-ssre "ab{0,1}c" () (: #\a (** 0 1 #\b) #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "^" () bos) -(test-ssre "$" () eos) -(test-ssre "a.c" () (: #\a nonl #\c)) -(test-ssre "a.*c" () (: #\a (* nonl) #\c)) -(test-ssre "a[bc]d" () (: #\a (or #\b #\c) #\d)) -(test-ssre "a[b-d]e" () (: #\a (char-range #\b #\d) #\e)) -(test-ssre "a[b-d]" () (: #\a (char-range #\b #\d))) -(test-ssre "a[-b]" () (: #\a (or #\- #\b))) -(test-ssre "a[b-]" () (: #\a (or #\b #\-))) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[^bc]d" () (: #\a (~ (or #\b #\c)) #\d)) -(test-ssre "a[^-b]c" () (: #\a (~ (or #\- #\b)) #\c)) -(test-ssre "a[^]b]c" () (: #\a (~ (or #\] #\b)) #\c)) -(test-ssre "\\ba\\b" () (: (or bow eow) #\a (or bow eow))) -(test-ssre "\\by\\b" () (: (or bow eow) #\y (or bow eow))) -(test-ssre "\\Ba\\B" () (: nwb #\a nwb)) -(test-ssre "\\By\\b" () (: nwb #\y (or bow eow))) -(test-ssre "\\by\\B" () (: (or bow eow) #\y nwb)) -(test-ssre "\\By\\B" () (: nwb #\y nwb)) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "a\\sb" () (: #\a space #\b)) -(test-ssre "a\\Sb" () (: #\a (~ space) #\b)) -(test-ssre "\\d" () numeric) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "ab|cd" () (or (: #\a #\b) (: #\c #\d))) -(test-ssre "()ef" () (: ($ (:)) #\e #\f)) -(test-ssre "$b" () (: eos #\b)) -(test-ssre "a\\(b" () (: #\a #\( #\b)) -(test-ssre "a\\(*b" () (: #\a (* #\() #\b)) -(test-ssre "a\\\\b" () (: #\a #\\ #\b)) -(test-ssre "((a))" () ($ ($ #\a))) -(test-ssre "(a)b(c)" () (: ($ #\a) #\b ($ #\c))) -(test-ssre "a+b+c" () (: (+ #\a) (+ #\b) #\c)) -(test-ssre "a{1,}b{1,}c" () (: (>= 1 #\a) (>= 1 #\b) #\c)) -(test-ssre "a.+?c" () (: #\a (**? 1 #f nonl) #\c)) -(test-ssre "(a+|b)*" () (* ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,}" () (>= 0 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)+" () (+ ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){1,}" () (>= 1 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)?" () (? ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,1}" () (** 0 1 ($ (or (+ #\a) #\b)))) -(test-ssre "[^ab]*" () (* (~ (or #\a #\b)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a*" () (* #\a)) -(test-ssre "([abc])*d" () (: (* ($ (or #\a #\b #\c))) #\d)) -(test-ssre "([abc])*bcd" () (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) -(test-ssre "a|b|c|d|e" () (or #\a #\b #\c #\d #\e)) -(test-ssre "(a|b|c|d|e)f" () (: ($ (or #\a #\b #\c #\d #\e)) #\f)) -(test-ssre "abcd*efg" () (: #\a #\b #\c (* #\d) #\e #\f #\g)) -(test-ssre "ab*" () (: #\a (* #\b))) -(test-ssre "(ab|cd)e" () (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "[abhgefdc]ij" () (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) -(test-ssre "^(ab|cd)e" () (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "(abc|)ef" () (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) -(test-ssre "(a|b)c*d" () (: ($ (or #\a #\b)) (* #\c) #\d)) -(test-ssre "(ab|ab*)bc" () (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) -(test-ssre "a([bc]*)c*" () (: #\a ($ (* (or #\b #\c))) (* #\c))) -(test-ssre "a([bc]*)(c*d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]+)(c*d)" () (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]*)(c+d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) -(test-ssre "a[bcd]*dcdcde" () (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "a[bcd]+dcdcde" () (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "(ab|a)b*c" () (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) -(test-ssre "((a)(b)c)(d)" () (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) -(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" () (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) -(test-ssre "^a(bc+|b[eh])g|.h$" () (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) -(test-ssre "(bc+d$|ef*g.|h?i(j|k))" () ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) -(test-ssre "((((((((((a))))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) -(test-ssre "(((((((((a)))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) -(test-ssre "multiple words of text" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) -(test-ssre "multiple words" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) -(test-ssre "(.*)c(.*)" () (: ($ (* nonl)) #\c ($ (* nonl)))) -(test-ssre "\\((.*), (.*)\\)" () (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) -(test-ssre "[k]" () #\k) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(bc)d" () (: #\a ($ (: #\b #\c)) #\d)) -(test-ssre "a[-]?c" () (: #\a (? #\-) #\c)) -(test-ssre "a(?!b)." () (: #\a (neg-look-ahead #\b) nonl)) -(test-ssre "a(?=d)." () (: #\a (look-ahead #\d) nonl)) -(test-ssre "a(?=c|d)." () (: #\a (look-ahead (or #\c #\d)) nonl)) -(test-ssre "a(?:b|c|d)(.)" () (: #\a (or #\b #\c #\d) ($ nonl))) -(test-ssre "a(?:b|c|d)*(.)" () (: #\a (* (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+?(.)" () (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+(.)" () (: #\a (+ (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){2}(.)" () (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}(.)" () (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}?(.)" () (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "((foo)|(bar))*" () (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)))))) -(test-ssre "a(?:b|c|d){6,7}(.)" () (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}?(.)" () (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}(.)" () (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}?(.)" () (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}(.)" () (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}?(.)" () (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" () (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) -(test-ssre "^(.+)?B" () (: bos (? ($ (+ nonl))) #\B)) -(test-ssre "^([^a-z])|(\\^)$" () (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) -(test-ssre "^[<>]&" () (: bos (or #\< #\>) #\&)) -(test-ssre "(?:(f)(o)(o)|(b)(a)(r))*" () (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r))))) -(test-ssre "(?<=a)b" () (: (look-behind #\a) #\b)) -(test-ssre "(?a+)ab" () (: ($ (: #\> (+ #\a))) #\a #\b)) -(test-ssre "a\\z" () (: #\a eos)) -(test-ssre "(?<=\\d{3}(?!999))foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) -(test-ssre "(?<=(?!...999)\\d{3})foo" () (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}(?!999)...)foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}...)(?= 2 (or #\a #\b))) -(test-ssre "[ab]{2,}?" () (**? 2 #f (or #\a #\b))) -(test-ssre "abc(?=xyz)" () (: #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "a\\b" () (: #\a (or bow eow))) -(test-ssre "abc(?=abcde)(?=ab)" () (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) -(test-ssre "a*?b*?" () (: (*? #\a) (*? #\b))) -(test-ssre "(a)(b)|(c)" () (or (: ($ #\a) ($ #\b)) ($ #\c))) -(test-ssre "(?aa)" () (-> A (: #\a #\a))) -(test-ssre "a(b)c(d)" () (: #\a ($ #\b) #\c ($ #\d))) -(test-ssre "^" () bos) -(test-ssre "(02-)?[0-9]{3}-[0-9]{3}" () (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "abc|bcd" () (or (: #\a #\b #\c) (: #\b #\c #\d))) -(test-ssre "(?<=abc|)" () (look-behind (or (: #\a #\b #\c) (:)))) -(test-ssre "(?<=abc|)" () (look-behind (or (: #\a #\b #\c) (:)))) -(test-ssre "(?<=|abc)" () (look-behind (or (:) (: #\a #\b #\c)))) -(test-ssre "[abc]" () (or #\a #\b #\c)) -(test-ssre "foobar" () (: #\f #\o #\o #\b #\a #\r)) -(test-ssre "foobar" () (: #\f #\o #\o #\b #\a #\r)) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "\\z" () eos) -(test-ssre "\\Z" () (: (? #\newline) eos)) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "(?![ab]).*" () (: (neg-look-ahead (or #\a #\b)) (* nonl))) -(test-ssre "[a[]" () (or #\a #\[)) -(test-ssre "\\bX" () (: (or bow eow) #\X)) -(test-ssre "\\BX" () (: nwb #\X)) -(test-ssre "X\\b" () (: #\X (or bow eow))) -(test-ssre "X\\B" () (: #\X nwb)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.{3,})b" () (: #\a ($ (>= 3 nonl)) #\b)) -(test-ssre "a(.{3,}?)b" () (: #\a ($ (**? 3 #f nonl)) #\b)) -(test-ssre "a(.{3,5})b" () (: #\a ($ (** 3 5 nonl)) #\b)) -(test-ssre "a(.{3,5}?)b" () (: #\a ($ (**? 3 5 nonl)) #\b)) -(test-ssre "(?<=aXb)cd" () (: (look-behind (: #\a #\X #\b)) #\c #\d)) -(test-ssre "(?<=(.))X" () (: (look-behind ($ nonl)) #\X)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "\\D" () (~ numeric)) -(test-ssre ">\\S" () (: #\> (~ space))) -(test-ssre "\\d" () numeric) -(test-ssre "\\s" () space) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "\\D{2,3}" () (** 2 3 (~ numeric))) -(test-ssre "\\D{2,3}?" () (**? 2 3 (~ numeric))) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d{2,3}" () (** 2 3 numeric)) -(test-ssre "\\d{2,3}?" () (**? 2 3 numeric)) -(test-ssre "\\S+" () (+ (~ space))) -(test-ssre "\\S{2,3}" () (** 2 3 (~ space))) -(test-ssre "\\S{2,3}?" () (**? 2 3 (~ space))) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s{2,3}<" () (: #\> (** 2 3 space) #\<)) -(test-ssre ">\\s{2,3}?<" () (: #\> (**? 2 3 space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w{2,3}" () (** 2 3 (or alnum #\_))) -(test-ssre "\\w{2,3}?" () (**? 2 3 (or alnum #\_))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}" () (** 2 3 (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}?" () (**? 2 3 (~ (or alnum #\_)))) -(test-ssre "^[ac]*b" () (: bos (* (or #\a #\c)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^\\d*b" () (: bos (* numeric) #\b)) -(test-ssre "(|a)" () ($ (or (:) #\a))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "[\\p{Nd}]" () numeric) -(test-ssre "[\\p{Nd}+-]+" () (+ (or numeric #\+ #\-))) -(test-ssre "[\\P{Nd}]+" () (+ (~ numeric))) -(test-ssre "^[\\p{Vowel}]" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^[\\p{Any}]X" () (: bos any #\X)) -(test-ssre "^[\\P{Any}]X" () (: bos (~ any) #\X)) -(test-ssre "^[\\p{Any}]?X" () (: bos (? any) #\X)) -(test-ssre "[.\\p{Lu}][.\\p{Ll}][.\\P{Lu}][.\\P{Ll}]" () (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) -(test-ssre "[\\p{L}]" () alpha) -(test-ssre "[\\P{L}]" () (~ alpha)) -(test-ssre "[\\pLu]" () (or alpha #\u)) -(test-ssre "[\\PLu]" () (or (~ alpha) #\u)) -(test-ssre "\\p{Nd}" () numeric) -(test-ssre "\\p{Nd}+" () (+ numeric)) -(test-ssre "\\P{Nd}+" () (+ (~ numeric))) -(test-ssre "^\\p{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^\\p{Any}X" () (: bos any #\X)) -(test-ssre "^\\P{Any}X" () (: bos (~ any) #\X)) -(test-ssre "^\\p{Any}?X" () (: bos (? any) #\X)) -(test-ssre "\\p{L}" () alpha) -(test-ssre "\\P{L}" () (~ alpha)) -(test-ssre "\\pLu" () (: alpha #\u)) -(test-ssre "\\PLu" () (: (~ alpha) #\u)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "ist" () (: #\i #\s #\t)) -(test-ssre "is+t" () (: #\i (+ #\s) #\t)) -(test-ssre "is+?t" () (: #\i (**? 1 #f #\s) #\t)) -(test-ssre "is?t" () (: #\i (? #\s) #\t)) -(test-ssre "is{2}t" () (: #\i (= 2 #\s) #\t)) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "AskZ" () (: #\A #\s #\k #\Z)) -(test-ssre "[AskZ]+" () (+ (or #\A #\s #\k #\Z))) -(test-ssre "[^s]+" () (+ (~ #\s))) -(test-ssre "[^s]+" () (+ (~ #\s))) -(test-ssre "[^k]+" () (+ (~ #\k))) -(test-ssre "[^k]+" () (+ (~ #\k))) -(test-ssre "[^sk]+" () (+ (~ (or #\s #\k)))) -(test-ssre "[^sk]+" () (+ (~ (or #\s #\k)))) -(test-ssre "i" () #\i) -(test-ssre "I" () #\I) -(test-ssre "[i]" () #\i) -(test-ssre "[zi]" () (or #\z #\i)) -(test-ssre "[iI]" () (or #\i #\I)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\b.+?\\b" () (: (or bow eow) (**? 1 #f nonl) (or bow eow))) -(test-ssre "caf\\B.+?\\B" () (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) -(test-ssre "x{1,3}+" () (+ (** 1 3 #\x))) -(test-ssre "[a]" () #\a) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "(?<=C\n)^" () (: (look-behind (: #\C #\newline)) bos)) -(test-ssre "\\w+(?=\t)" () (: (+ (or alnum #\_)) (look-ahead #\tab))) +; options tests -;; new set notation tests +(test-ssre "(?i)A string" (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g))) +(test-ssre "(?i)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl))))) +(test-ssre "(?i)^[W-c]+$" (w/nocase (: bos (+ (char-range #\W #\c)) eos))) +(test-ssre "(?s)\\A(.)*\\z" (: bos (* ($ any)) eos)) +(test-ssre "(?i)[^a]" (w/nocase (~ #\a))) +(test-ssre "(?i:saturday|sunday)" (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y)))) +(test-ssre "(?i)(?i:a)b" (w/nocase (: #\a #\b))) +(test-ssre "(?i)((?i:a))b" (w/nocase (: ($ #\a) #\b))) +(test-ssre "(?i)(?-i:a)b" (w/nocase (: (w/case #\a) #\b))) +(test-ssre "(?i)((?-i:a))b" (w/nocase (: ($ (w/case #\a)) #\b))) +(test-ssre "(?i)(?-i:a)b" (w/nocase (: (w/case #\a) #\b))) +(test-ssre "((?-i:a))b" (: ($ #\a) #\b)) +(test-ssre "(?-i:a)b" (: #\a #\b)) +(test-ssre "((?-i:a))b" (: ($ #\a) #\b)) +(test-ssre "(?is)((?-i:a.))b" (w/nocase (: ($ (w/case (: #\a any))) #\b))) +;(test-ssre "^a(?#xxx){3}c" (: bos "a" "{3}c")) -- (?#comments) not supported +(test-ssre "(?m)^b$" (: bol #\b eol)) +(test-ssre "(?ms)^b." (: bol #\b any)) +(test-ssre "(?i)([\\w:]+::)?(\\w+)$" (w/nocase (: (? ($ (: (+ (or alnum #\_ #\:)) #\: #\:))) ($ (+ (or alnum #\_))) eos))) +(test-ssre "(?x)x y z | a b c" (or (: #\x #\y #\z) (: #\a #\b #\c))) +(test-ssre "(?i)AB(?-i:C)" (w/nocase (: #\A #\B (w/case #\C)))) +(test-ssre "(?i)reg(?:ul(?:[a@]|ae)r|ex)" (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x))))) +(test-ssre "ab cd (?x: de fg)" (: #\a #\b #\space #\c #\d #\space (: #\d #\e #\f #\g))) +(test-ssre "ab cd(?x: de fg) h" (: #\a #\b #\space #\c #\d (: #\d #\e #\f #\g) #\space #\h)) +(test-ssre "(?s)^\\w+=.*(\\\\\n.*)*" (: bos (+ (or alnum #\_)) #\= (* any) (* ($ (: #\\ #\newline (* any)))))) +(test-ssre "(?i)[^a]*" (w/nocase (* (~ #\a)))) +(test-ssre "(?i)[^a]*?X" (w/nocase (: (*? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]+?X" (w/nocase (: (**? 1 #f (~ #\a)) #\X))) +(test-ssre "(?i)[^a]?X" (w/nocase (: (? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]??X" (w/nocase (: (?? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]{2,3}" (w/nocase (** 2 3 (~ #\a)))) +(test-ssre "(?i)[^a]{2,3}?" (w/nocase (**? 2 3 (~ #\a)))) +(test-ssre "(?i)(?<=a{2})b" (w/nocase (: (look-behind (= 2 #\a)) #\b))) +(test-ssre "(?i)(?= 8 (* (or (~ alpha) #\a #\*)))) +(test-ssre "(?i)abc" (w/nocase (: #\a #\b #\c))) +(test-ssre "(?i)(?-i)the end" (w/nocase (w/case (: #\t #\h #\e #\space #\e #\n #\d)))) ; optimise? +(test-ssre "(?i)([\\da-f:]+)$" (w/nocase (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos))) +(test-ssre "(?i)^[\\da-f](\\.[\\da-f])*$" (w/nocase (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos))) +(test-ssre "(?is)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* any))))) +(test-ssre "(?isn)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: (* (~ #\.)) #\. (* (~ #\:)) #\: (+ (or #\T #\space)) (* any)))) +(test-ssre "(?i)^[W-c]+$" (w/nocase (: bos (+ (char-range #\W #\c)) eos))) +(test-ssre "(?i)^[\x3f;-\x5F;]+$" (w/nocase (: bos (+ (char-range #\? #\_)) eos))) +(test-ssre "(?i)[^a]" (w/nocase (~ #\a))) +(test-ssre "(?i)[^a]+" (w/nocase (+ (~ #\a)))) +(test-ssre "(?i)[^az]" (w/nocase (~ (or #\a #\z)))) +(test-ssre "(?i)\\b(foo)\\s+(\\w+)" (w/nocase (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))))) +(test-ssre "a(?i:b)c" (: #\a (w/nocase #\b) #\c)) +(test-ssre "a(?i:b)*c" (: #\a (* (w/nocase #\b)) #\c)) +(test-ssre "(?im)^(?-u:\\w\\s*\\w)$" (w/nocase (: bol (w/ascii (: (or alnum #\_) (* space) (or alnum #\_))) eol))) +(test-ssre "(?i)abc\\X*" (w/nocase (: #\a #\b #\c (* grapheme)))) +(test-ssre "(?n)((((((((((a))))))))))" #\a) +(test-ssre "(?n)((((((((?-n:(a)))))))))" ($ #\a)) +(test-ssre "(?n)((((((((?-n:(a)|(a)))))))))" (or ($ #\a) ($ #\a))) -(test-ssre "{Nd}" () numeric) -(test-ssre "{Nd|[+]|[-]}+" () (+ (or numeric #\+ #\-))) -(test-ssre "{~Nd}+" () (+ (~ numeric))) -(test-ssre "^{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^{Any}X" () (: bos any #\X)) -(test-ssre "^{~Any}X" () (: bos (~ any) #\X)) -(test-ssre "^{Any}?X" () (: bos (? any) #\X)) -(test-ssre "{[.]|Lu}{[.]|Ll}{[.]|~Lu}{[.]|~Ll}" () (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) -(test-ssre "{L}" () alpha) -(test-ssre "{~L}" () (~ alpha)) -(test-ssre "{L}u" () (: alpha #\u)) -(test-ssre "{~L}u" () (: (~ alpha) #\u)) -(test-ssre "{L-Vowel}u" () (: (- alpha (or #\A #\E #\I #\O #\U #\Y #\W)) #\u)) -(test-ssre "{Nd}" () numeric) -(test-ssre "{Nd}+" () (+ numeric)) -(test-ssre "{~Nd}+" () (+ (~ numeric))) -(test-ssre "^{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^{Any}X" () (: bos any #\X)) -(test-ssre "^{~Any}X" () (: bos (~ any) #\X)) -(test-ssre "^{Any}?X" () (: bos (? any) #\X)) -(test-ssre "{u}{l|d}*" () (: upper (* (or lower numeric)))) -(test-ssre "{~d}{an|[']}*" () (: (~ numeric) (* (or alnum #\')))) -(test-ssre "{<}{u&~Vowel|d}{!b}{an-d}*{>}" () (: bow (or (- upper (or #\A #\E #\I #\O #\U #\Y #\W)) numeric) nwb (* (- alnum numeric)) eow)) -(test-ssre "{}\\X*" () (: grapheme (* grapheme))) +; restore default definitions for sre tests +(ssre-definitions *ssre-definitions*) -;; options tests +; NOTE: reverse translations on the right are not the only correct ones; there can be equivalent translations, which are also correct -(test-ssre "A string" (i) (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i) (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl))))) -(test-ssre "^[W-c]+$" (i) (w/nocase (: bos (+ (char-range #\W #\c)) eos))) -(test-ssre "\\A(.)*\\z" (s) (: bos (* ($ any)) eos)) -(test-ssre "[^a]" (i) (w/nocase (~ #\a))) -(test-ssre "(?i:saturday|sunday)" () (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y)))) -(test-ssre "(?i:a)b" (i) (w/nocase (: #\a #\b))) -(test-ssre "((?i:a))b" (i) (w/nocase (: ($ #\a) #\b))) -(test-ssre "(?-i:a)b" (i) (w/nocase (: (w/case #\a) #\b))) -(test-ssre "((?-i:a))b" (i) (w/nocase (: ($ (w/case #\a)) #\b))) -(test-ssre "(?-i:a)b" (i) (w/nocase (: (w/case #\a) #\b))) -(test-ssre "((?-i:a))b" () (: ($ #\a) #\b)) -(test-ssre "(?-i:a)b" () (: #\a #\b)) -(test-ssre "((?-i:a))b" () (: ($ #\a) #\b)) -(test-ssre "((?-i:a.))b" (i s) (w/nocase (: ($ (w/case (: #\a any))) #\b))) -;(test-ssre "^a(?#xxx){3}c" () (: bos "a" "{3}c")) -- (?#comments) not supported -(test-ssre "(?m)^b$" () (: bol #\b eol)) -(test-ssre "(?ms)^b." () (: bol #\b any)) -(test-ssre "([\\w:]+::)?(\\w+)$" (i) (w/nocase (: (? ($ (: (+ (or alnum #\_ #\:)) #\: #\:))) ($ (+ (or alnum #\_))) eos))) -(test-ssre "(?x)x y z | a b c" () (or (: #\x #\y #\z) (: #\a #\b #\c))) -(test-ssre "(?i)AB(?-i:C)" () (w/nocase (: #\A #\B (w/case #\C)))) -(test-ssre "(?i)reg(?:ul(?:[a@]|ae)r|ex)" () (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x))))) -(test-ssre "ab cd (?x: de fg)" () (: #\a #\b #\space #\c #\d #\space (: #\d #\e #\f #\g))) -(test-ssre "ab cd(?x: de fg) h" () (: #\a #\b #\space #\c #\d (: #\d #\e #\f #\g) #\space #\h)) -(test-ssre "^\\w+=.*(\\\\\n.*)*" (s) (: bos (+ (or alnum #\_)) #\= (* any) (* ($ (: #\\ #\newline (* any)))))) -(test-ssre "[^a]*" (i) (w/nocase (* (~ #\a)))) -(test-ssre "[^a]*?X" (i) (w/nocase (: (*? (~ #\a)) #\X))) -(test-ssre "[^a]+?X" (i) (w/nocase (: (**? 1 #f (~ #\a)) #\X))) -(test-ssre "[^a]?X" (i) (w/nocase (: (? (~ #\a)) #\X))) -(test-ssre "[^a]??X" (i) (w/nocase (: (?? (~ #\a)) #\X))) -(test-ssre "[^a]{2,3}" (i) (w/nocase (** 2 3 (~ #\a)))) -(test-ssre "[^a]{2,3}?" (i) (w/nocase (**? 2 3 (~ #\a)))) -(test-ssre "(?<=a{2})b" (i) (w/nocase (: (look-behind (= 2 #\a)) #\b))) -(test-ssre "(?= 8 (* (or (~ alpha) #\a #\*)))) -(test-ssre "(?i)abc" (i) (w/nocase (: #\a #\b #\c))) -(test-ssre "(?-i)the end" (i) (w/nocase (w/case (: #\t #\h #\e #\space #\e #\n #\d)))) ; optimise? -(test-ssre "([\\da-f:]+)$" (i) (w/nocase (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos))) -(test-ssre "^[\\da-f](\\.[\\da-f])*$" (i) (w/nocase (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i s) (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* any))))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i s n) (w/nocase (: (* (~ #\.)) #\. (* (~ #\:)) #\: (+ (or #\T #\space)) (* any)))) -(test-ssre "^[W-c]+$" (i) (w/nocase (: bos (+ (char-range #\W #\c)) eos))) -(test-ssre "^[\x3f;-\x5F;]+$" (i) (w/nocase (: bos (+ (char-range #\? #\_)) eos))) -(test-ssre "[^a]" (i) (w/nocase (~ #\a))) -(test-ssre "[^a]+" (i) (w/nocase (+ (~ #\a)))) -(test-ssre "[^az]" (i) (w/nocase (~ (or #\a #\z)))) -(test-ssre "\\b(foo)\\s+(\\w+)" (i) (w/nocase (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))))) -(test-ssre "a(?i:b)c" () (: #\a (w/nocase #\b) #\c)) -(test-ssre "a(?i:b)*c" () (: #\a (* (w/nocase #\b)) #\c)) -(test-ssre "^(?-u:\\w\\s*\\w)$" (i m) (w/nocase (: bol (w/ascii (: (or alnum #\_) (* space) (or alnum #\_))) eol))) -(test-ssre "abc\\X*" (i) (w/nocase (: #\a #\b #\c (* grapheme)))) -(test-ssre "((((((((((a))))))))))" (n) #\a) -(test-ssre "((((((((?-n:(a)))))))))" (n) ($ #\a)) +(test-sre (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x) "the quick brown fox") +(test-sre (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z) "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB*zz") +(test-sre (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z) "^(abc){1,2}zz") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}c") +(test-sre (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c) "^(ba|b*){1,2}?bc") +(test-sre (: bos (or #\a #\b #\] #\c #\d #\e)) "^[ab\\]cde]") +(test-sre (: bos (or #\] #\c #\d #\e)) "^[\\]cde]") +(test-sre (: bos (~ (or #\a #\b #\] #\c #\d #\e))) "^[^ab\\]cde]") +(test-sre (: bos (~ (or #\] #\c #\d #\e))) "^[^\\]cde]") +(test-sre (: bos #\@) "^@") +(test-sre (: bos (+ (char-range #\0 #\9)) eos) "^[0-9]+$") +(test-sre (: bos (* nonl) #\n #\t #\e #\r) "^.*nter") +(test-sre (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos) "^xxx[0-9]+$") +(test-sre (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+[0-9][0-9][0-9]$") +(test-sre (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+?[0-9][0-9][0-9]$") +(test-sre (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos) "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$") +(test-sre #\: ":") +(test-sre (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos) "([\\da-f:]+)$") +(test-sre (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos) "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos) "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$") +(test-sre (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos) "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$") +(test-sre (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e)) "^(?=ab(de))(abd)(e)") +(test-sre (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f)) "^(?!(ab)de|x)(abd)(f)") +(test-sre (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b))) "^(?=(ab(cd)))(ab)") +(test-sre (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos) "^[\\da-f](\\.[\\da-f])*$") +(test-sre (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos) "^\".*\"\\s*(;.*)?$") +(test-sre (: bos eos) "^$") +(test-sre (: bos #\a #\space #\b (or #\c #\space) #\d eos) "^a b[c ]d$") +(test-sre (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos) "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$") +(test-sre (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos) "^a(b(c))d(e(f))h(i(j))k(l(m))$") +(test-sre (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\]) "^\\w\\W\\s\\S\\d\\D\\]") +(test-sre (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\}))) "^[.\\^$|()*+?{,}]+") +(test-sre (: bos (* #\a) (or alnum #\_)) "^a*\\w") +(test-sre (: bos (*? #\a) (or alnum #\_)) "^a*?\\w") +(test-sre (: bos (+ #\a) (or alnum #\_)) "^a+\\w") +(test-sre (: bos (**? 1 #f #\a) (or alnum #\_)) "^a+?\\w") +(test-sre (: bos (= 8 numeric) (>= 2 (or alnum #\_))) "^\\d{8}\\w{2,}") +(test-sre (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos) "^[aeiou\\d]{4,5}$") +(test-sre (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric))) "^[aeiou\\d]{4,5}?") +(test-sre (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9)) "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]") +(test-sre (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric) "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d") +(test-sre (: bos #\1 #\2 nonl #\3 #\4) "^12.34") +(test-sre (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl))) "foo(?!bar)(.*)") +(test-sre (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl))) "(?:(?!foo)...|^.{0,2})bar(.*)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: (neg-look-ahead bos) #\a #\b #\c) "{!= 8 numeric) #\@ (+ nonl) (~ #\k) eos) "^\\d{8,}@.+[^k]$") +(test-sre (~ #\a) "[^a]") +(test-sre (~ (or #\a #\z)) "[^az]") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]{1,6}?LL") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]+?LL") +(test-sre (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric)) "(\\.\\d\\d[1-9]?)\\d+") +(test-sre ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric)))))) "(\\.\\d\\d((?=0)|\\d(?=\\d)))") +(test-sre (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))) "\\b(foo)\\s+(\\w+)") +(test-sre (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r) "foo(.*)bar") +(test-sre (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r) "foo(.*?)bar") +(test-sre (: ($ (* nonl)) ($ (* numeric))) "(.*)(\\d*)") +(test-sre (: ($ (* nonl)) ($ (+ numeric))) "(.*)(\\d+)") +(test-sre (: ($ (*? nonl)) ($ (* numeric))) "(.*?)(\\d*)") +(test-sre (: ($ (*? nonl)) ($ (+ numeric))) "(.*?)(\\d+)") +(test-sre (: ($ (* nonl)) ($ (+ numeric)) eos) "(.*)(\\d+)$") +(test-sre (: ($ (*? nonl)) ($ (+ numeric)) eos) "(.*?)(\\d+)$") +(test-sre (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos) "(.*)\\b(\\d+)$") +(test-sre (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos) "(.*\\D)(\\d+)$") +(test-sre (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3))) "^\\D*(?!123)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: bos (or #\W #\-) #\4 #\6 #\]) "^[W\\-]46\\]") +(test-sre (: bos (or (char-range #\W #\]) #\4 #\6)) "^[W-\\]46]") +(test-sre (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,10}otherword") +(test-sre (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,300}otherword") +(test-sre (: bos (= 0 ($ #\a))) "^(a){0}") +(test-sre (: bos (** 0 1 ($ #\a))) "^(a)?") +(test-sre (: bos (** 0 2 ($ #\a))) "^(a){0,2}") +(test-sre (: bos (** 0 3 ($ #\a))) "^(a){0,3}") +(test-sre (: bos (>= 0 ($ #\a))) "^(a)*") +(test-sre (: bos (= 1 ($ #\a))) "^(a)") +(test-sre (: bos (** 1 2 ($ #\a))) "^(a){1,2}") +(test-sre (: bos (** 1 3 ($ #\a))) "^(a){1,3}") +(test-sre (: bos (>= 1 ($ #\a))) "^(a)+") +(test-sre (: (* nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (>= 0 nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (* nonl) eos) ".*$") +(test-sre ($ (or (: (* nonl) #\X) (: bos #\B))) "(.*X|^B)") +(test-sre (: bos (* nonl) #\B) "^.*B") +(test-sre (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9)) "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]") +(test-sre (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric) "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d") +(test-sre (: bos (= 12 (or #\a #\b #\c))) "^[abc]{12}") +(test-sre (: bos (= 12 (char-range #\a #\c))) "^[a-c]{12}") +(test-sre (: bos (= 12 ($ (or #\a #\b #\c)))) "^([abc]){12}") +(test-sre (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) "^[abcdefghijklmnopqrstuvwxy0123456789]") +(test-sre (: #\a #\b #\c #\d (= 0 #\e)) "abcde{0}") +(test-sre (: #\a #\b (= 0 (or #\c #\d)) #\e) "ab[cd]{0}e") +(test-sre (: #\a #\b (= 0 ($ #\c)) #\d) "ab(c){0}d") +(test-sre (: #\a ($ (* #\b))) "a(b*)") +(test-sre (: #\a #\b (= 0 numeric) #\e) "ab\\d{0}e") +(test-sre (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\") "\"([^\\\\\"]+|\\\\.)*\"") +(test-sre (*? nonl) ".*?") +(test-sre (or bow eow) "\\b") +(test-sre (or bow eow) "\\b") +(test-sre (: #\a (~ #\a) #\b) "a[^a]b") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a (~ #\a) #\b) "a[^a]b") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}?c") +(test-sre (: (neg-look-ahead bos) #\x) "{!= 0 #\b) #\b #\c) "ab*bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (** 1 3 #\b) #\b #\c) "ab{1,3}bc") +(test-sre (: #\a (** 3 4 #\b) #\b #\c) "ab{3,4}bc") +(test-sre (: #\a (** 4 5 #\b) #\b #\c) "ab{4,5}bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (** 0 1 #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\c) "ab?c") +(test-sre (: #\a (** 0 1 #\b) #\c) "ab?c") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre bos "^") +(test-sre eos "$") +(test-sre (: #\a nonl #\c) "a.c") +(test-sre (: #\a (* nonl) #\c) "a.*c") +(test-sre (: #\a (or #\b #\c) #\d) "a[bc]d") +(test-sre (: #\a (char-range #\b #\d) #\e) "a[b-d]e") +(test-sre (: #\a (char-range #\b #\d)) "a[b-d]") +(test-sre (: #\a (or #\- #\b)) "a[\\-b]") +(test-sre (: #\a (or #\b #\-)) "a[b\\-]") +(test-sre (: #\a #\]) "a\\]") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (~ (or #\b #\c)) #\d) "a[^bc]d") +(test-sre (: #\a (~ (or #\- #\b)) #\c) "a[^\\-b]c") +(test-sre (: #\a (~ (or #\] #\b)) #\c) "a[^\\]b]c") +(test-sre (: (or bow eow) #\a (or bow eow)) "\\ba\\b") +(test-sre (: (or bow eow) #\y (or bow eow)) "\\by\\b") +(test-sre (: nwb #\a nwb) "\\Ba\\B") +(test-sre (: nwb #\y (or bow eow)) "\\By\\b") +(test-sre (: (or bow eow) #\y nwb) "\\by\\B") +(test-sre (: nwb #\y nwb) "\\By\\B") +(test-sre (or alnum #\_) "\\w") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: #\a space #\b) "a\\sb") +(test-sre (: #\a (~ space) #\b) "a\\Sb") +(test-sre numeric "\\d") +(test-sre (~ numeric) "\\D") +(test-sre (or (: #\a #\b) (: #\c #\d)) "ab|cd") +(test-sre (: ($ (:)) #\e #\f) "()ef") +(test-sre (: eos #\b) "$b") +(test-sre (: #\a #\( #\b) "a\\(b") +(test-sre (: #\a (* #\() #\b) "a\\(*b") +(test-sre (: #\a #\\ #\b) "a\\\\b") +(test-sre ($ ($ #\a)) "((a))") +(test-sre (: ($ #\a) #\b ($ #\c)) "(a)b(c)") +(test-sre (: (+ #\a) (+ #\b) #\c) "a+b+c") +(test-sre (: (>= 1 #\a) (>= 1 #\b) #\c) "a+b+c") +(test-sre (: #\a (**? 1 #f nonl) #\c) "a.+?c") +(test-sre (* ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (>= 0 ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (+ ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (>= 1 ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (? ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (** 0 1 ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (* (~ (or #\a #\b))) "[^ab]*") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (* #\a) "a*") +(test-sre (: (* ($ (or #\a #\b #\c))) #\d) "([abc])*d") +(test-sre (: (* ($ (or #\a #\b #\c))) #\b #\c #\d) "([abc])*bcd") +(test-sre (or #\a #\b #\c #\d #\e) "[abcde]") +(test-sre (: ($ (or #\a #\b #\c #\d #\e)) #\f) "([abcde])f") +(test-sre (: #\a #\b #\c (* #\d) #\e #\f #\g) "abcd*efg") +(test-sre (: #\a (* #\b)) "ab*") +(test-sre (: ($ (or (: #\a #\b) (: #\c #\d))) #\e) "(ab|cd)e") +(test-sre (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j) "[abhgefdc]ij") +(test-sre (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e) "^(ab|cd)e") +(test-sre (: ($ (or (: #\a #\b #\c) (:))) #\e #\f) "(abc|)ef") +(test-sre (: ($ (or #\a #\b)) (* #\c) #\d) "([ab])c*d") +(test-sre (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c) "(ab|ab*)bc") +(test-sre (: #\a ($ (* (or #\b #\c))) (* #\c)) "a([bc]*)c*") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]*)(c*d)") +(test-sre (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]+)(c*d)") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d))) "a([bc]*)(c+d)") +(test-sre (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]*dcdcde") +(test-sre (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]+dcdcde") +(test-sre (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c) "(ab|a)b*c") +(test-sre (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d)) "((a)(b)c)(d)") +(test-sre (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_))) "[a-zA-Z_][a-zA-Z0-9_]*") +(test-sre (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos)) "^a(bc+|b[eh])g|.h$") +(test-sre ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k))))) "(bc+d$|ef*g.|h?i([jk]))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) "((((((((((a))))))))))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))) "(((((((((a)))))))))") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t) "multiple words of text") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s) "multiple words") +(test-sre (: ($ (* nonl)) #\c ($ (* nonl))) "(.*)c(.*)") +(test-sre (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\)) "\\((.*), (.*)\\)") +(test-sre #\k "k") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ (: #\b #\c)) #\d) "a(bc)d") +(test-sre (: #\a (? #\-) #\c) "a-?c") +(test-sre (: #\a (neg-look-ahead #\b) nonl) "a(?!b).") +(test-sre (: #\a (look-ahead #\d) nonl) "a(?=d).") +(test-sre (: #\a (look-ahead (or #\c #\d)) nonl) "a(?=[cd]).") +(test-sre (: #\a (or #\b #\c #\d) ($ nonl)) "a[bcd](.)") +(test-sre (: #\a (* (or #\b #\c #\d)) ($ nonl)) "a[bcd]*(.)") +(test-sre (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl)) "a[bcd]+?(.)") +(test-sre (: #\a (+ (or #\b #\c #\d)) ($ nonl)) "a[bcd]+(.)") +(test-sre (: #\a (= 2 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{2}(.)") +(test-sre (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}(.)") +(test-sre (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}?(.)") +(test-sre (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}(.)") +(test-sre (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}?(.)") +(test-sre (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}(.)") +(test-sre (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}?(.)") +(test-sre (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}(.)") +(test-sre (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}?(.)") +(test-sre (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl)) "a(?:b|([ce]){1,2}?|d)+?(.)") +(test-sre (: bos (? ($ (+ nonl))) #\B) "^(.+)?B") +(test-sre (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos)) "^([^a-z])|(\\^)$") +(test-sre (: bos (or #\< #\>) #\&) "^[<>]&") +(test-sre (: (look-behind #\a) #\b) "(?<=a)b") +(test-sre (: (neg-look-behind #\c) #\b) "(? (+ #\a))) #\a #\b) "(>a+)ab") +(test-sre (: #\b eos) "b$") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o) "(?<=\\d{3}(?!999))foo") +(test-sre (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o) "(?<=(?!...999)\\d{3})foo") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o) "(?<=\\d{3}(?!999)...)foo") +(test-sre (: (look-behind (: (= 3 numeric) nonl nonl nonl)) (neg-look-behind (: #\9 #\9 #\9)) #\f #\o #\o) "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_)) "a*b*\\w") +(test-sre (: (* #\a) (* numeric) (or alnum #\_)) "a*\\d*\\w") +(test-sre (: (* #\a) (* #\b) (or alnum #\_)) "a*b*\\w") +(test-sre (: eos (neg-look-behind #\newline)) "$(?= 2 ($ (** 2 3 #\a)))) #\a) "^(?:(a{2,3}){2,})+a") +(test-sre (look-ahead #\C) "(?=C)") +(test-sre (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9))))) "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)") +(test-sre (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_))) "^(?:(a){2,})+(\\w)") +(test-sre (: bos (+ (>= 2 #\a)) ($ (or alnum #\_))) "^(?:a{2,})+(\\w)") +(test-sre (: bos (*? nonl) ($ (or #\a (: #\b #\c)))) "^.*?(a|bc)") +(test-sre (: bos (*? nonl) (or #\a (: #\b #\c) #\d)) "^.*?(?:a|bc|d)") +(test-sre (: (*? nonl) #\a (look-behind (: #\b #\a))) ".*?a(?<=ba)") +(test-sre (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d)) "a(?=bc).|abd") +(test-sre (: bos (*? nonl) (or #\a (: #\b #\c))) "^.*?(?:a|bc)") +(test-sre (: bos (* numeric) (= 4 (or alnum #\_))) "^\\d*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos (* #\a) (= 4 (or alnum #\_))) "^a*\\w{4}") +(test-sre (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?:(?foo)|(?bar))\\k") +(test-sre (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?A)(?:(?foo)|(?bar))\\k") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c) "(?:x|(xx|yy)+|[xxxxxaaa])bc") +(test-sre (: space #\a #\b #\c) "\\sabc") +(test-sre (: (* #\Z) (= 216 ($ (or (:) (* #\d))))) "Z*(|d*){216}") +(test-sre (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X) "(?<=a(B){0}c)X") +(test-sre (: (+ #\a) (or (:) #\b) #\a) "a+(?:|b)a") +(test-sre (: (? #\X) (= 3335 ($ (or #\R (:) (:))))) "X?(R||){3335}") +(test-sre (or (: (neg-look-ahead ($ #\b)) #\c) #\b) "(?!(b))c|b") +(test-sre (or (: (look-ahead ($ #\b)) #\b) #\c) "(?=(b))b|c") +(test-sre (: #\< (or #\a #\space #\b) #\>) "<[a b]>") +(test-sre (+ (or numeric #\-)) "[\\d\\-]+") +(test-sre (look-behind (? (look-ahead nonl))) "(?<=(?=.)?)") +(test-sre (look-behind (** 4 5 (look-ahead nonl))) "(?<=(?=.){4,5})") +(test-sre (look-behind (: (** 4 5 (look-ahead nonl)) #\x)) "(?<=(?=.){4,5}x)") +(test-sre (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space) " (? \\w+ )* \\. ") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre (: (look-behind (look-ahead (look-behind #\a))) #\b) "(?<=(?=(?<=a)))b") +(test-sre (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl) "(?<=ab?c)...") +(test-sre (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl) "(?<=PQR|ab?c)...") +(test-sre (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl) "(?<=ab?c|PQR)...") +(test-sre (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl) "(?<=PQ|ab?c)...") +(test-sre (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl) "(?<=ab?c|PQ)...") +(test-sre (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl) "(?<=a(b?c|d?e?e)f)X.") +(test-sre (: (neg-look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl) "(?= 5 (char-range #\a #\z)) #\b) #\x) "[a-z]{5,}b|x") +(test-sre (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x) "[a-z]{1,6}?s|x") +(test-sre #\@ "@") +(test-sre (: #\@ #\@ #\@ #\x #\x #\x) "@@@xxx") +(test-sre (: (look-behind #\x) #\b #\a #\d #\u #\t #\f) "(?<=x)badutf") +(test-sre (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f) "(?<=xx)badutf") +(test-sre (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f) "(?<=xxxx)badutf") +(test-sre #\X "X") +(test-sre (+ #\a) "a+") +(test-sre #\A "A") +(test-sre #\x "x") +(test-sre (: #\a #\b #\c) "abc") +(test-sre #\X "X") +(test-sre (: (look-behind nonl) #\X) "(?<=.)X") +(test-sre (+ #\a) "a+") +(test-sre #\a "a") +(test-sre nonl ".") +(test-sre #\s "s") +(test-sre (~ #\s) "[^s]") +(test-sre (: #\a (*? nonl) #\a) "a.*?a") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre (: #\a (or bow eow)) "a\\b") +(test-sre (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b))) "abc(?=abcde)(?=ab)") +(test-sre (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3) "(?<=abc)123") +(test-sre (: (or bow eow) #\a #\b #\c (or bow eow)) "\\babc\\b") +(test-sre (: (look-behind (: #\a #\b #\c)) #\d #\e #\f) "(?<=abc)def") +(test-sre (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f) "abc(?<=bc)def") +(test-sre (: (look-behind (: #\a #\b)) #\c #\d #\e #\f) "(?<=ab)cdef") +(test-sre (: #\b (neg-look-behind (: #\a #\x)) (neg-look-ahead (: #\c #\x))) "b(? A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A)) "(?tom|bon)-\\k") +(test-sre (: #\X (** 2 4 #\a) #\b) "Xa{2,4}b") +(test-sre (: #\X (**? 2 4 #\a) #\b) "Xa{2,4}?b") +(test-sre (: #\X (+ (** 2 4 #\a)) #\b) "X(?:a{2,4})+b") +(test-sre (: #\X (** 2 4 numeric) #\b) "X\\d{2,4}b") +(test-sre (: #\X (**? 2 4 numeric) #\b) "X\\d{2,4}?b") +(test-sre (: #\X (+ (** 2 4 numeric)) #\b) "X(?:\\d{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}b") +(test-sre (: #\X (**? 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b) "X(?:[abc]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: #\Z (neg-look-ahead (:))) "Z(?!)") +(test-sre (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y)))) "dog(sbody)?") +(test-sre (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y)))) "dog(sbody)??") +(test-sre (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y)) "dog|dogsbody") +(test-sre (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g)) "dogsbody|dog") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3) "(?<=abc)123") +(test-sre (: (or bow eow) #\a #\b #\c (or bow eow)) "\\babc\\b") +(test-sre (: (? #\a) (? #\b)) "a?b?") +(test-sre (: bos (? #\a) (? #\b)) "^a?b?") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: (look-behind (: #\a #\b #\c)) #\d #\e #\f) "(?<=abc)def") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre (: #\a #\b #\c (or bow eow)) "abc\\b") +(test-sre (: #\a #\b #\c nwb) "abc\\B") +(test-sre (+ nonl) ".+") +(test-sre (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X) "(?<=(abc)+)X") +(test-sre (or (: ($ #\a) #\b) (: #\a #\c)) "(a)b|ac") +(test-sre (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c)) "(a)(b)x|abc") +(test-sre (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X) "(?:(foo)|(bar)|(baz))X") +(test-sre (or (: ($ (: #\a #\b)) #\x) (: #\a #\b)) "(ab)x|ab") +(test-sre ($ ($ ($ ($ ($ #\a))))) "(((((a)))))") +(test-sre (: (*? #\a) (*? #\b)) "a*?b*?") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: #\a ($ #\b) #\c) "a(b)c") +(test-sre (or (: ($ #\a) ($ #\b)) ($ #\c)) "(a)(b)|(c)") +(test-sre (or (-> A #\a) (-> A #\b)) "(?a)|(?b)") +(test-sre (: #\a ($ #\b) #\c ($ #\d)) "a(b)c(d)") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: (* nonl) numeric) ".*\\d") +(test-sre (* ($ (: #\a #\b #\c))) "(abc)*") +(test-sre bos "^") +(test-sre (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b)) "(?:ab)?abab") +(test-sre (: #\a #\b #\c) "abc") +(test-sre ($ (: #\a #\b #\c #\d)) "(abcd)") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ #\b) #\c) "a(b)c") +(test-sre (: #\a (or punct #\b)) "a{p|[b]}") +(test-sre (: #\a (or #\b punct)) "a{[b]|p}") +(test-sre (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a) "0b 28 3f 2d 78 29 3a") +(test-sre (or #\a (: ($ #\b) #\c)) "a|(b)c") +(test-sre (: #\e #\f #\g) "efg") +(test-sre (: #\e #\f #\f) "eff") +(test-sre (: #\e #\f #\f #\g) "effg") +(test-sre (: #\a #\a #\a) "aaa") +(test-sre (neg-look-behind (or (:) (: #\! (neg-look-behind (:))))) "(? A (:))) "\\[()\\]{65535}(?)") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre eos "$") +(test-sre (: (? #\newline) eos) "\\Z") +(test-sre (: (neg-look-ahead (or #\a #\b)) (* nonl)) "(?![ab]).*") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X) "12345(?<=\\d{1,256})X") +(test-sre (neg-look-behind (** 9 44965 ($ (= 65054 #\space)))) "(? n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?:(?foo)|(?bar))\\k") +(test-sre (: (? #\a) #\b (* (or #\] #\x #\y)) #\c) "a?b[\\]xy]*c") +(test-sre (* #\f) "f*") +(test-sre (: #\f #\o #\o #\*) "foo\\*") +(test-sre (: #\f #\o #\o #\* #\b #\a #\r) "foo\\*bar") +(test-sre (: #\f #\\ #\o #\o) "f\\\\oo") +(test-sre (or #\t #\e #\n) "[ten]") +(test-sre (: #\t (char-range #\a #\g) #\n) "t[a-g]n") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (or #\] #\a #\-) #\b) "a[\\]a\\-]b") +(test-sre (: #\a (or #\] #\-) #\b) "a[\\]\\-]b") +(test-sre (: #\a (or #\] (char-range #\a #\z)) #\b) "a[\\]a-z]b") +(test-sre #\] "\\]") +(test-sre (: #\t (or #\! (char-range #\a #\g)) #\n) "t[!a-g]n") +(test-sre (: #\A (char-range #\+ #\0) #\B) "A[+-0]B") +(test-sre (: #\a (char-range #\- #\0) #\z) "a[\\--0]z") +(test-sre (: #\a (or numeric #\.) #\z) "a[\\d.]z") +(test-sre (: #\A nwb #\\ #\C (~ numeric)) "A\\B\\\\C\\D") +(test-sre (: (* #\a) #\b) "a*b") +(test-sre (: #\< (or #\] #\b #\c) #\>) "<[\\]bc]>") +(test-sre (: #\< (~ (or #\] #\b #\c)) #\>) "<[^\\]bc]>") +(test-sre (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\)) "a*b+c\\+[def](ab)\\(cd\\)") +(test-sre (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o) "how.to how\\.to") +(test-sre (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o) "^how to \\^how to") +(test-sre (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\)) "^b\\(c^d\\)\\(^e^f\\)") +(test-sre (: #\[ ($ (:)) (= 65535 #\]) ($ (:))) "\\[()\\]{65535}()") +(test-sre (: bos #\A) "^A") +(test-sre (: bos (+ (or alnum #\_))) "^\\w+") +(test-sre (: ($ (+ nonl)) (or bow eow) ($ (+ nonl))) "(.+)\\b(.+)") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (>= 3 nonl)) #\b) "a(.{3,})b") +(test-sre (: #\a ($ (**? 3 #f nonl)) #\b) "a(.{3,}?)b") +(test-sre (: #\a ($ (** 3 5 nonl)) #\b) "a(.{3,5})b") +(test-sre (: #\a ($ (**? 3 5 nonl)) #\b) "a(.{3,5}?)b") +(test-sre (: (look-behind (: #\a #\X #\b)) #\c #\d) "(?<=aXb)cd") +(test-sre (: (look-behind ($ nonl)) #\X) "(?<=(.))X") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (* (~ numeric)) "\\D*") +(test-sre (* (~ numeric)) "\\D*") +(test-sre (~ numeric) "\\D") +(test-sre (: #\> (~ space)) ">\\S") +(test-sre numeric "\\d") +(test-sre space "\\s") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (** 2 3 (~ numeric)) "\\D{2,3}") +(test-sre (**? 2 3 (~ numeric)) "\\D{2,3}?") +(test-sre (+ numeric) "\\d+") +(test-sre (** 2 3 numeric) "\\d{2,3}") +(test-sre (**? 2 3 numeric) "\\d{2,3}?") +(test-sre (+ (~ space)) "\\S+") +(test-sre (** 2 3 (~ space)) "\\S{2,3}") +(test-sre (**? 2 3 (~ space)) "\\S{2,3}?") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (** 2 3 space) #\<) ">\\s{2,3}<") +(test-sre (: #\> (**? 2 3 space) #\<) ">\\s{2,3}?<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (** 2 3 (or alnum #\_)) "\\w{2,3}") +(test-sre (**? 2 3 (or alnum #\_)) "\\w{2,3}?") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (** 2 3 (~ (or alnum #\_))) "\\W{2,3}") +(test-sre (**? 2 3 (~ (or alnum #\_))) "\\W{2,3}?") +(test-sre (: bos (* (or #\a #\c)) #\b) "^[ac]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* numeric) #\b) "^\\d*b") +(test-sre ($ (or (:) #\a)) "(|a)") +(test-sre (: (~ space) (~ space)) "\\S\\S") +(test-sre (= 2 (~ space)) "\\S{2}") +(test-sre (: (~ (or alnum #\_)) (~ (or alnum #\_))) "\\W\\W") +(test-sre (= 2 (~ (or alnum #\_))) "\\W{2}") +(test-sre (~ space) "\\S") +(test-sre (~ numeric) "\\D") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: nonl (~ (or (~ space) #\newline)) nonl) ".[^\\S\n].") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (* #\A) "A*") +(test-sre nonl ".") +(test-sre (: bos (* numeric) (= 4 (or alnum #\_))) "^\\d*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos nonl nwb nonl nwb nonl) "^.\\B.\\B.") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (: bos (+ (or alnum #\_))) "^\\w+") +(test-sre (: bos (+ numeric)) "^\\d+") +(test-sre (: bos #\> (+ space)) "^>\\s+") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (+ (or #\R #\S #\T)) "[RST]+") +(test-sre (+ (char-range #\R #\T)) "[R-T]+") +(test-sre (+ (char-range #\q #\u)) "[q-u]+") +(test-sre (: bos (? #\s) #\c) "^s?c") +(test-sre (char-range #\A #\`) "[A-`]") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) (**? 1 #f nonl) (or bow eow)) "\\b.+?\\b") +(test-sre (: #\c #\a #\f nwb (**? 1 #f nonl) nwb) "caf\\B.+?\\B") +(test-sre (: #\c #\3 #\space #\b #\1) "c3 b1") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (or alnum #\_) "\\w") +(test-sre (: #\X (** 2 4 #\a) #\b) "Xa{2,4}b") +(test-sre (: #\X (**? 2 4 #\a) #\b) "Xa{2,4}?b") +(test-sre (: #\X (+ (** 2 4 #\a)) #\b) "X(?:a{2,4})+b") +(test-sre (: #\X (** 2 4 numeric) #\b) "X\\d{2,4}b") +(test-sre (: #\X (**? 2 4 numeric) #\b) "X\\d{2,4}?b") +(test-sre (: #\X (+ (** 2 4 numeric)) #\b) "X(?:\\d{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}b") +(test-sre (: #\X (**? 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b) "X(?:[abc]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: #\X (= 3 (~ (or alnum #\_))) #\X) "X\\W{3}X") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: (neg-look-behind bos) #\E #\T #\A) "(? A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A)) "(?:(?ss)|(?kk)) \\k") +(test-sre (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!) "(?:(?s)|(?k)) \\k{3,}!") +(test-sre #\i "i") +(test-sre #\I "I") +(test-sre #\i "i") +(test-sre (~ #\i) "[^i]") +(test-sre (or #\z #\i) "[zi]") +(test-sre (or #\i #\I) "[iI]") +(test-sre (+ numeric) "\\d+") +(test-sre (+ numeric) "\\d+") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) #\A #\B #\C (or bow eow)) "\\bABC\\b") +(test-sre (: (or bow eow) #\A #\B #\C (or bow eow)) "\\bABC\\b") +(test-sre (neg-look-behind ($ (or (:) (: #\l #\space)))) "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z) "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB*zz") +(test-sre (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z) "^(abc){1,2}zz") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}c") +(test-sre (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c) "^(b+|a){1,2}?bc") +(test-sre (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c) "^(b*|ba){1,2}?bc") +(test-sre (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c) "^(ba|b*){1,2}?bc") +(test-sre (: bos (or #\a #\b #\] #\c #\d #\e)) "^[ab\\]cde]") +(test-sre (: bos (or #\] #\c #\d #\e)) "^[\\]cde]") +(test-sre (: bos (~ (or #\a #\b #\] #\c #\d #\e))) "^[^ab\\]cde]") +(test-sre (: bos (~ (or #\] #\c #\d #\e))) "^[^\\]cde]") +(test-sre (: bos #\@) "^@") +(test-sre (: bos (+ (char-range #\0 #\9)) eos) "^[0-9]+$") +(test-sre (: bos (* nonl) #\n #\t #\e #\r) "^.*nter") +(test-sre (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos) "^xxx[0-9]+$") +(test-sre (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+[0-9][0-9][0-9]$") +(test-sre (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+?[0-9][0-9][0-9]$") +(test-sre (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos) "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$") +(test-sre #\: ":") +(test-sre (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos) "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos) "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$") +(test-sre (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos) "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$") +(test-sre (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e)) "^(?=ab(de))(abd)(e)") +(test-sre (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f)) "^(?!(ab)de|x)(abd)(f)") +(test-sre (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b))) "^(?=(ab(cd)))(ab)") +(test-sre (: bos eos) "^$") +(test-sre (: bos #\a #\space #\b (or #\c #\space) #\d eos) "^a b[c ]d$") +(test-sre (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos) "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$") +(test-sre (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos) "^a(b(c))d(e(f))h(i(j))k(l(m))$") +(test-sre (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\}))) "^[.\\^$|()*+?{,}]+") +(test-sre (: bos (* #\a) (or alnum #\_)) "^a*\\w") +(test-sre (: bos (*? #\a) (or alnum #\_)) "^a*?\\w") +(test-sre (: bos (+ #\a) (or alnum #\_)) "^a+\\w") +(test-sre (: bos (**? 1 #f #\a) (or alnum #\_)) "^a+?\\w") +(test-sre (: bos (= 8 numeric) (>= 2 (or alnum #\_))) "^\\d{8}\\w{2,}") +(test-sre (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos) "^[aeiou\\d]{4,5}$") +(test-sre (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric))) "^[aeiou\\d]{4,5}?") +(test-sre (: bos #\1 #\2 nonl #\3 #\4) "^12.34") +(test-sre (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl))) "foo(?!bar)(.*)") +(test-sre (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl))) "(?:(?!foo)...|^.{0,2})bar(.*)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: (neg-look-ahead bos) #\a #\b #\c) "{!= 8 numeric) #\@ (+ nonl) (~ #\k) eos) "^\\d{8,}@.+[^k]$") +(test-sre (~ #\a) "[^a]") +(test-sre (~ (or #\a #\z)) "[^az]") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]{1,6}?LL") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]+?LL") +(test-sre (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric)) "(\\.\\d\\d[1-9]?)\\d+") +(test-sre ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric)))))) "(\\.\\d\\d((?=0)|\\d(?=\\d)))") +(test-sre (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r) "foo(.*)bar") +(test-sre (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r) "foo(.*?)bar") +(test-sre (: ($ (* nonl)) ($ (+ numeric))) "(.*)(\\d+)") +(test-sre (: ($ (*? nonl)) ($ (+ numeric))) "(.*?)(\\d+)") +(test-sre (: ($ (* nonl)) ($ (+ numeric)) eos) "(.*)(\\d+)$") +(test-sre (: ($ (*? nonl)) ($ (+ numeric)) eos) "(.*?)(\\d+)$") +(test-sre (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos) "(.*)\\b(\\d+)$") +(test-sre (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos) "(.*\\D)(\\d+)$") +(test-sre (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3))) "^\\D*(?!123)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: bos (or (char-range #\W #\]) #\4 #\6)) "^[W-\\]46]") +(test-sre (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,10}otherword") +(test-sre (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,300}otherword") +(test-sre (: bos (= 0 ($ #\a))) "^(a){0}") +(test-sre (: bos (** 0 1 ($ #\a))) "^(a)?") +(test-sre (: bos (** 0 2 ($ #\a))) "^(a){0,2}") +(test-sre (: bos (** 0 3 ($ #\a))) "^(a){0,3}") +(test-sre (: bos (>= 0 ($ #\a))) "^(a)*") +(test-sre (: bos (= 1 ($ #\a))) "^(a)") +(test-sre (: bos (** 1 2 ($ #\a))) "^(a){1,2}") +(test-sre (: bos (** 1 3 ($ #\a))) "^(a){1,3}") +(test-sre (: bos (>= 1 ($ #\a))) "^(a)+") +(test-sre (: (* nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (>= 0 nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (* nonl) eos) ".*$") +(test-sre ($ (or (: (* nonl) #\X) (: bos #\B))) "(.*X|^B)") +(test-sre (: bos (* nonl) #\B) "^.*B") +(test-sre (: bol (* nonl) #\B) "{= 0 #\b) #\b #\c) "ab*bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (** 1 3 #\b) #\b #\c) "ab{1,3}bc") +(test-sre (: #\a (** 3 4 #\b) #\b #\c) "ab{3,4}bc") +(test-sre (: #\a (** 4 5 #\b) #\b #\c) "ab{4,5}bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (** 0 1 #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\c) "ab?c") +(test-sre (: #\a (** 0 1 #\b) #\c) "ab?c") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre bos "^") +(test-sre eos "$") +(test-sre (: #\a nonl #\c) "a.c") +(test-sre (: #\a (* nonl) #\c) "a.*c") +(test-sre (: #\a (or #\b #\c) #\d) "a[bc]d") +(test-sre (: #\a (char-range #\b #\d) #\e) "a[b-d]e") +(test-sre (: #\a (char-range #\b #\d)) "a[b-d]") +(test-sre (: #\a (or #\- #\b)) "a[\\-b]") +(test-sre (: #\a (or #\b #\-)) "a[b\\-]") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (~ (or #\b #\c)) #\d) "a[^bc]d") +(test-sre (: #\a (~ (or #\- #\b)) #\c) "a[^\\-b]c") +(test-sre (: #\a (~ (or #\] #\b)) #\c) "a[^\\]b]c") +(test-sre (: (or bow eow) #\a (or bow eow)) "\\ba\\b") +(test-sre (: (or bow eow) #\y (or bow eow)) "\\by\\b") +(test-sre (: nwb #\a nwb) "\\Ba\\B") +(test-sre (: nwb #\y (or bow eow)) "\\By\\b") +(test-sre (: (or bow eow) #\y nwb) "\\by\\B") +(test-sre (: nwb #\y nwb) "\\By\\B") +(test-sre (or alnum #\_) "\\w") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: #\a space #\b) "a\\sb") +(test-sre (: #\a (~ space) #\b) "a\\Sb") +(test-sre numeric "\\d") +(test-sre (~ numeric) "\\D") +(test-sre (or (: #\a #\b) (: #\c #\d)) "ab|cd") +(test-sre (: ($ (:)) #\e #\f) "()ef") +(test-sre (: eos #\b) "$b") +(test-sre (: #\a #\( #\b) "a\\(b") +(test-sre (: #\a (* #\() #\b) "a\\(*b") +(test-sre (: #\a #\\ #\b) "a\\\\b") +(test-sre ($ ($ #\a)) "((a))") +(test-sre (: ($ #\a) #\b ($ #\c)) "(a)b(c)") +(test-sre (: (+ #\a) (+ #\b) #\c) "a+b+c") +(test-sre (: (>= 1 #\a) (>= 1 #\b) #\c) "a+b+c") +(test-sre (: #\a (**? 1 #f nonl) #\c) "a.+?c") +(test-sre (* ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (>= 0 ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (+ ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (>= 1 ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (? ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (** 0 1 ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (* (~ (or #\a #\b))) "[^ab]*") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (* #\a) "a*") +(test-sre (: (* ($ (or #\a #\b #\c))) #\d) "([abc])*d") +(test-sre (: (* ($ (or #\a #\b #\c))) #\b #\c #\d) "([abc])*bcd") +(test-sre (or #\a #\b #\c #\d #\e) "[abcde]") +(test-sre (: ($ (or #\a #\b #\c #\d #\e)) #\f) "([abcde])f") +(test-sre (: #\a #\b #\c (* #\d) #\e #\f #\g) "abcd*efg") +(test-sre (: #\a (* #\b)) "ab*") +(test-sre (: ($ (or (: #\a #\b) (: #\c #\d))) #\e) "(ab|cd)e") +(test-sre (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j) "[abhgefdc]ij") +(test-sre (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e) "^(ab|cd)e") +(test-sre (: ($ (or (: #\a #\b #\c) (:))) #\e #\f) "(abc|)ef") +(test-sre (: ($ (or #\a #\b)) (* #\c) #\d) "([ab])c*d") +(test-sre (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c) "(ab|ab*)bc") +(test-sre (: #\a ($ (* (or #\b #\c))) (* #\c)) "a([bc]*)c*") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]*)(c*d)") +(test-sre (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]+)(c*d)") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d))) "a([bc]*)(c+d)") +(test-sre (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]*dcdcde") +(test-sre (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]+dcdcde") +(test-sre (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c) "(ab|a)b*c") +(test-sre (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d)) "((a)(b)c)(d)") +(test-sre (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_))) "[a-zA-Z_][a-zA-Z0-9_]*") +(test-sre (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos)) "^a(bc+|b[eh])g|.h$") +(test-sre ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k))))) "(bc+d$|ef*g.|h?i([jk]))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) "((((((((((a))))))))))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))) "(((((((((a)))))))))") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t) "multiple words of text") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s) "multiple words") +(test-sre (: ($ (* nonl)) #\c ($ (* nonl))) "(.*)c(.*)") +(test-sre (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\)) "\\((.*), (.*)\\)") +(test-sre #\k "k") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ (: #\b #\c)) #\d) "a(bc)d") +(test-sre (: #\a (? #\-) #\c) "a-?c") +(test-sre (: #\a (neg-look-ahead #\b) nonl) "a(?!b).") +(test-sre (: #\a (look-ahead #\d) nonl) "a(?=d).") +(test-sre (: #\a (look-ahead (or #\c #\d)) nonl) "a(?=[cd]).") +(test-sre (: #\a (or #\b #\c #\d) ($ nonl)) "a[bcd](.)") +(test-sre (: #\a (* (or #\b #\c #\d)) ($ nonl)) "a[bcd]*(.)") +(test-sre (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl)) "a[bcd]+?(.)") +(test-sre (: #\a (+ (or #\b #\c #\d)) ($ nonl)) "a[bcd]+(.)") +(test-sre (: #\a (= 2 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{2}(.)") +(test-sre (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}(.)") +(test-sre (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}?(.)") +(test-sre (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r))))) "((foo)|(bar))*") +(test-sre (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}(.)") +(test-sre (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}?(.)") +(test-sre (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}(.)") +(test-sre (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}?(.)") +(test-sre (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}(.)") +(test-sre (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}?(.)") +(test-sre (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl)) "a(?:b|([ce]){1,2}?|d)+?(.)") +(test-sre (: bos (? ($ (+ nonl))) #\B) "^(.+)?B") +(test-sre (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos)) "^([^a-z])|(\\^)$") +(test-sre (: bos (or #\< #\>) #\&) "^[<>]&") +(test-sre (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r)))) "(?:(f)(o)(o)|(b)(a)(r))*") +(test-sre (: (look-behind #\a) #\b) "(?<=a)b") +(test-sre (: (neg-look-behind #\c) #\b) "(? (+ #\a))) #\a #\b) "(>a+)ab") +(test-sre (: #\a eos) "a$") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o) "(?<=\\d{3}(?!999))foo") +(test-sre (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o) "(?<=(?!...999)\\d{3})foo") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o) "(?<=\\d{3}(?!999)...)foo") +(test-sre (: (look-behind (: (= 3 numeric) nonl nonl nonl)) (neg-look-behind (: #\9 #\9 #\9)) #\f #\o #\o) "(?<=\\d{3}...)(?= 2 (or #\a #\b)) "[ab]{2,}") +(test-sre (**? 2 #f (or #\a #\b)) "[ab]{2,}?") +(test-sre (: #\a #\b #\c (look-ahead (: #\x #\y #\z))) "abc(?=xyz)") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre (: #\a (or bow eow)) "a\\b") +(test-sre (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b))) "abc(?=abcde)(?=ab)") +(test-sre (: (*? #\a) (*? #\b)) "a*?b*?") +(test-sre (or (: ($ #\a) ($ #\b)) ($ #\c)) "(a)(b)|(c)") +(test-sre (-> A (: #\a #\a)) "(?aa)") +(test-sre (: #\a ($ #\b) #\c ($ #\d)) "a(b)c(d)") +(test-sre bos "^") +(test-sre (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9))) "(02-)?[0-9]{3}-[0-9]{3}") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (or (: #\a #\b #\c) (: #\b #\c #\d)) "abc|bcd") +(test-sre (look-behind (or (: #\a #\b #\c) (:))) "(?<=abc|)") +(test-sre (look-behind (or (: #\a #\b #\c) (:))) "(?<=abc|)") +(test-sre (look-behind (or (:) (: #\a #\b #\c))) "(?<=|abc)") +(test-sre (or #\a #\b #\c) "[abc]") +(test-sre (: #\f #\o #\o #\b #\a #\r) "foobar") +(test-sre (: #\f #\o #\o #\b #\a #\r) "foobar") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre eos "$") +(test-sre (: (? #\newline) eos) "\\Z") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre (: (neg-look-ahead (or #\a #\b)) (* nonl)) "(?![ab]).*") +(test-sre (or #\a #\[) "[a\\[]") +(test-sre (: (or bow eow) #\X) "\\bX") +(test-sre (: nwb #\X) "\\BX") +(test-sre (: #\X (or bow eow)) "X\\b") +(test-sre (: #\X nwb) "X\\B") +(test-sre (~ #\a) "[^a]") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (>= 3 nonl)) #\b) "a(.{3,})b") +(test-sre (: #\a ($ (**? 3 #f nonl)) #\b) "a(.{3,}?)b") +(test-sre (: #\a ($ (** 3 5 nonl)) #\b) "a(.{3,5})b") +(test-sre (: #\a ($ (**? 3 5 nonl)) #\b) "a(.{3,5}?)b") +(test-sre (: (look-behind (: #\a #\X #\b)) #\c #\d) "(?<=aXb)cd") +(test-sre (: (look-behind ($ nonl)) #\X) "(?<=(.))X") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (~ numeric) "\\D") +(test-sre (: #\> (~ space)) ">\\S") +(test-sre numeric "\\d") +(test-sre space "\\s") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (** 2 3 (~ numeric)) "\\D{2,3}") +(test-sre (**? 2 3 (~ numeric)) "\\D{2,3}?") +(test-sre (+ numeric) "\\d+") +(test-sre (** 2 3 numeric) "\\d{2,3}") +(test-sre (**? 2 3 numeric) "\\d{2,3}?") +(test-sre (+ (~ space)) "\\S+") +(test-sre (** 2 3 (~ space)) "\\S{2,3}") +(test-sre (**? 2 3 (~ space)) "\\S{2,3}?") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (** 2 3 space) #\<) ">\\s{2,3}<") +(test-sre (: #\> (**? 2 3 space) #\<) ">\\s{2,3}?<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (** 2 3 (or alnum #\_)) "\\w{2,3}") +(test-sre (**? 2 3 (or alnum #\_)) "\\w{2,3}?") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (** 2 3 (~ (or alnum #\_))) "\\W{2,3}") +(test-sre (**? 2 3 (~ (or alnum #\_))) "\\W{2,3}?") +(test-sre (: bos (* (or #\a #\c)) #\b) "^[ac]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* numeric) #\b) "^\\d*b") +(test-sre ($ (or (:) #\a)) "(|a)") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: #\i #\s #\t) "ist") +(test-sre (: #\i (+ #\s) #\t) "is+t") +(test-sre (: #\i (**? 1 #f #\s) #\t) "is+?t") +(test-sre (: #\i (? #\s) #\t) "is?t") +(test-sre (: #\i (= 2 #\s) #\t) "is{2}t") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (: #\A #\s #\k #\Z) "AskZ") +(test-sre (+ (or #\A #\s #\k #\Z)) "[AskZ]+") +(test-sre (+ (~ #\s)) "[^s]+") +(test-sre (+ (~ #\s)) "[^s]+") +(test-sre (+ (~ #\k)) "[^k]+") +(test-sre (+ (~ #\k)) "[^k]+") +(test-sre (+ (~ (or #\s #\k))) "[^sk]+") +(test-sre (+ (~ (or #\s #\k))) "[^sk]+") +(test-sre #\i "i") +(test-sre #\I "I") +(test-sre (or #\z #\i) "[zi]") +(test-sre (or #\i #\I) "[iI]") +(test-sre (+ numeric) "\\d+") +(test-sre (+ numeric) "\\d+") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) (**? 1 #f nonl) (or bow eow)) "\\b.+?\\b") +(test-sre (: #\c #\a #\f nwb (**? 1 #f nonl) nwb) "caf\\B.+?\\B") +(test-sre (+ (** 1 3 #\x)) "(?:x{1,3})+") +(test-sre (~ #\a) "[^a]") +(test-sre (: (look-behind (: #\C #\newline)) bos) "(?<=C\n)^") +(test-sre (: (+ (or alnum #\_)) (look-ahead #\tab)) "\\w+(?=\t)") +(test-sre (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g)) "(?i)A string") +(test-sre (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) "(?i)([^.]*)\\.([^:]*):[T ]+(.*)") +(test-sre (w/nocase (: bos (+ (char-range #\W #\c)) eos)) "(?i)^[W-c]+$") +(test-sre (w/nocase (~ #\a)) "(?i)[^a]") +(test-sre (: (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y))) #\:) "(?i:saturday|sunday):") +(test-sre (: (w/nocase #\a) #\b) "(?i:a)b") +(test-sre (w/nocase (: ($ (w/nocase #\a)) #\b)) "(?i)((?i:a))b") +(test-sre (w/nocase (: (w/case #\a) #\b)) "(?i)(?-i:a)b") +(test-sre (w/nocase (: ($ (w/case #\a)) #\b)) "(?i)((?-i:a))b") +(test-sre (w/nocase (: (w/case #\a) #\b)) "(?i)(?-i:a)b") +(test-sre (w/nocase (: (? ($ (: (+ (or (or alnum #\_) #\:))) #\: #\:))) ($ (+ (or alnum #\_))) eos) "(?i)([\\w:]+::)?(\\w+)$") +(test-sre (w/nocase (: #\A #\B (w/case #\C))) "(?i)AB(?-i:C)") +(test-sre (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x)))) "(?i)reg(?:ul(?:[a@]|ae)r|ex)") +(test-sre (w/nocase (* (~ #\a))) "(?i)[^a]*") +(test-sre (w/nocase (: (*? (~ #\a)) #\X)) "(?i)[^a]*?X") +(test-sre (w/nocase (: (**? 1 #f (~ #\a)) #\X)) "(?i)[^a]+?X") +(test-sre (w/nocase (: (? (~ #\a)) #\X)) "(?i)[^a]?X") +(test-sre (w/nocase (: (?? (~ #\a)) #\X)) "(?i)[^a]??X") +(test-sre (w/nocase (** 2 3 (~ #\a))) "(?i)[^a]{2,3}") +(test-sre (w/nocase (**? 2 3 (~ #\a))) "(?i)[^a]{2,3}?") +(test-sre (w/nocase (: (look-behind (= 2 #\a)) #\b)) "(?i)(?<=a{2})b") +(test-sre (w/nocase (: (neg-look-behind (= 2 #\a)) #\b)) "(?i)(?Abstract

Issues

-

The design of the string-sre-definitions procedure assumes that parameters follow the protocol +

The design of the ssre-definitions procedure assumes that parameters follow the protocol described in SRFI 39, namely that a parameter procedure can be called with a value argument to set the parameter globally. This behavior is not required by R7RS.

@@ -69,7 +69,7 @@

Rationale

for features that are not covered by SRE specification are supported; if a SRE feature can be represented by more than one PCRE construct, only the most frequently used one of each group is supported by SSRE. The examples below show SSRE notation and its equivalent in SRE form (note that -multiple SRE equivalents with the exact same semantics may exist). The string-sre->sre +multiple SRE equivalents with the exact same semantics may exist). The ssre->sre procedure does the transformation.

@@ -160,18 +160,18 @@

Rationale

-The set of named sets/boundary conditions/expressions can be extended via the string-sre-definitions +The set of named sets/boundary conditions/expressions can be extended via the ssre-definitions parameter, e.g.:

-
  (string-sre-definitions
-    (string-sre-bind 'Any 'cset 'any
-    (string-sre-bind 'Nd 'cset char-set:Nd
-    (string-sre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w)
-    (string-sre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W)
-    (string-sre-bind 'EHi 'cset char-set:Egyptian_Hieroglyphs
-    (string-sre-bind 't 'cset char-set:title-case
-    (string-sre-definitions))))))))
+
  (ssre-definitions
+    (ssre-bind 'Any 'cset 'any
+    (ssre-bind 'Nd 'cset char-set:Nd
+    (ssre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w)
+    (ssre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W)
+    (ssre-bind 'EHi 'cset char-set:Egyptian_Hieroglyphs
+    (ssre-bind 't 'cset char-set:title-case
+    (ssre-definitions))))))))
 
 
@@ -184,40 +184,44 @@

Specification

-(string-sre->srestring ⟩) +(ssre->srestring ⟩)   procedure
This procedure converts ⟨string ⟩ in SSRE syntax to the corresponding SRE. -If the string is not a valid SSRE, an error that satisfies string-sre-syntax-error? is signaled. +It is an error if the argument is not a valid SSRE string.

-(string-sre->regexpstring ⟩) +(ssre->regexpstring ⟩)   procedure
This procedure converts ⟨string ⟩ in SSRE syntax to the corresponding regexp by applying the regexp procedure from SRFI 115 to the -conversion result. If the string is not a valid SSRE, an error that satisfies string-sre-syntax-error? -is signaled. +conversion result. It is an error if the argument is not a valid SSRE string.

-(string-sre-syntax-error?obj ⟩) +(sre->ssresre ⟩)   procedure
-Error type predicate. Returns #t if ⟨obj ⟩ is an object raised -by the string-sre->sre procedure. Otherwise, returns #f. +This procedure converts ⟨sre ⟩ expression in SRFI 115 +SRE format to the SSRE syntax. SREs containing embedded SRFI 14 +character set objects can be converted only if all such objects are bound to names in the definitions parameter +as described below. +It is an error if the argument is not a valid SRE, or contains non-bound character set objects.

-(string-sre-definitions)   procedure
-(string-sre-definitions bindings ⟩)   procedure
-This procedure acts as a parameter, providing access to the list of defined entities for the string-sre->sre procedure. -The ⟨bindings ⟩ argument is either a result of calling string-sre-definitions with -no arguments, or a result of calling one of the two procedures below. +(ssre-definitions)   procedure
+(ssre-definitions bindings ⟩)   procedure
+This procedure acts as a parameter, providing access to the list of defined entities for the ssre->sre +and sre->ssre procedures. +The ⟨bindings ⟩ argument is either a result of calling ssre-definitions with +no arguments, or a result of calling one of the two procedures below. The exact form this object takes is +left unspecified, as only procedures in this library need to interpret it.

-(string-sre-bind name ⟩ ⟨type ⟩ ⟨sre ⟩ ⟨bindings ⟩)   procedure
-(string-sre-unbind name ⟩ ⟨bindings ⟩)   procedure
-The ⟨bindings ⟩ argument is either a result of calling string-sre-definitions with no arguments, or a result of calling one of these two procedures. +(ssre-bind name ⟩ ⟨type ⟩ ⟨sre ⟩ ⟨bindings ⟩)   procedure
+(ssre-unbind name ⟩ ⟨bindings ⟩)   procedure
+The ⟨bindings ⟩ argument is either a result of calling ssre-definitions with no arguments, or a result of calling one of these two procedures. The ⟨name ⟩ argument is a symbol. The ⟨type ⟩ argument is one of the following symbols: cset (stands for character set), bcnd (stands for boundary condition), or expr (stands for any SRE expression).
The first procedure creates a new ⟨bindings ⟩ object, giving a new definition for ⟨name ⟩, replacing the old definition if any; the @@ -226,7 +230,7 @@

Specification

-Note that the result of the conversion depends only on the input string and the value of the string-sre-definitions +Note that the result of the conversion depends only on the input string and the value of the ssre-definitions parameter. This allows implementations to cache conversion results if the converter is called with the same SSRE string repeatedly.

@@ -272,7 +276,9 @@

String SRE grammar

empty regexp ⟩ ⟶ ⟨xs ⟩ ⟨quantified regexp ⟩ ⟶ -| primary regexp ⟩ ⟨quantifier ⟩* +| primary regexp ⟩ ⟨quantifier ⟩ ? +| ⟨primary regexp ⟩ ⟨quantifier ⟩ +| ⟨primary regexp ⟩ ⟨quantifier ⟩ ⟶ | xs ⟩ * @@ -334,21 +340,25 @@

String SRE grammar

| \\ | \^ | \- | \[ | \]set regexp ⟩ ⟶ -| {xs ⟩ ⟨set alt ⟩ ⟨xs ⟩ } +| {xs ⟩ ⟨set body ⟩ ⟨xs ⟩ } + +⟨set body ⟩ ⟶ +| ?regexp options ⟩ :xs ⟩ ⟨set body ⟩ +| ⟨set alt ⟩ ⟨set alt ⟩ ⟶ -| set infix op ⟩ ⟨xs ⟩ |xs ⟩ ⟨set infix op ⟩ -| ⟨set infix op ⟩ +| set infix ⟩ ⟨xs ⟩ |xs ⟩ ⟨set alt ⟩ +| ⟨set infix ⟩ -⟨set infix op ⟩ ⟶ -| set prefix op ⟩ -| ⟨set infix op ⟩ ⟨xs ⟩ &xs ⟩ ⟨set prefix op ⟩ -| ⟨set infix op ⟩ ⟨xs ⟩ -xs ⟩ ⟨set prefix op ⟩ +⟨set infix ⟩ ⟶ +| set prefix ⟩ +| ⟨set infix ⟩ ⟨xs ⟩ &xs ⟩ ⟨set prefix ⟩ +| ⟨set infix ⟩ ⟨xs ⟩ -xs ⟩ ⟨set prefix ⟩ -⟨set prefix op ⟩ ⟶ +⟨set prefix ⟩ ⟶ | set primary ⟩ -| ~xs ⟩ ⟨set prefix op ⟩ -| !xs ⟩ ⟨set prefix op ⟩ +| ~xs ⟩ ⟨set prefix ⟩ +| !xs ⟩ ⟨set prefix ⟩ ⟨set primary ⟩ ⟶ | set name ⟩ @@ -390,7 +400,13 @@

String SRE grammar

capture label ⟩ ⟶ ⟨word ⟩ ⟨class name ⟩ ⟶ ⟨word ⟩ ⟨set name ⟩ ⟶ ⟨name ⟩ +

+

Note that translation of non-greedy repeats with a non-zero lower bound and infinite upper bound +(e.g. x+? or x{2,}?) is implementation-dependent. Systems that support the +corresponding extended SRE forms (e.g. by providing a way to specify the infinite upper bound in the +**? form) are encouraged to implement the translation as well; otherwise these forms +are errors.

Named entities

@@ -410,6 +426,12 @@

Named entities

anycsetany _csetany + nonlcsetnonl + + + asciicsetascii + + digitcsetnumeric dcsetnumeric ncsetnumeric @@ -517,12 +539,20 @@

Character set notation operators

|bcnd×bcnd→bcnd| (or) +

+There is also a special notation for option change, similar to the one used at the beginning of +non-capturing groups. A sequence of the form ?regexp options ⟩ : +acts like a prefix operator with the lowest possible priority; it can follow an open brace or another option change +notation. Its scope is the rest of the set notation expression up to the matching close brace. +

+

Regexp options

-Options can be specified either as optional symbol arguments to the string-sre->sre procedure, -or in the SSRE string itself via (?option ⟩… notation at the start of the string -or a non-capturing group. They have their traditional meanings. Some of them affect the way certain SSRE expressions are translated +Options can be specified in the SSRE string itself via ?regexp options ⟩ notation at the start of the string +(in parenthesized form) or at the start of a non-capturing group or a set notation +(followed by colon as described above). +They have their traditional meanings. Some of them affect the way certain SSRE expressions are translated into SRE, some are purely syntactical (x). The table below lists all of them with short descriptions; in the Translation column, comma separates SRE constructs used when the option is on from their alternatives used when the option is off. Initially all options are off except for u. @@ -545,12 +575,14 @@

Regexp options

Sample implementation

-

Implementation note:

+

Implementation note: the files linked below can be load-ed into many +R6RS/R7RS systems; they can serve as starting points for system-specific implementations, +possibly supporting additional constructs.

-Source for the sample implementation (R6RS/R7RS).
+Source for the sample implementation.
Tests (ASCII only). -

The SRFI-264 repository also contains R7RS libraries and a SRFI-64-based test suite.

+

The SRFI-264 repository also contains a portable R7RS library and a SRFI-64-based test suite.

Acknowledgements

diff --git a/srfi/264.sld b/srfi/264.sld index 5ab0ac7..b56769d 100644 --- a/srfi/264.sld +++ b/srfi/264.sld @@ -2,9 +2,9 @@ ;;; SPDX-License-Identifier: MIT (define-library (srfi 264) - (export string-sre->sre string-sre->regexp string-sre-syntax-error? - string-sre-definitions string-sre-bind string-sre-unbind ) - (import (scheme base) (scheme char) (scheme cxr) (srfi 39) (srfi 115)) + (export ssre->sre ssre->regexp sre->ssre ssre-definitions ssre-bind ssre-unbind) + (import (scheme base) (scheme char) (scheme cxr) (scheme write) + (srfi 14) (srfi 39) (srfi 115)) (begin ;======================================================================================== @@ -15,6 +15,9 @@ ; Helpers +(define orsym (string->symbol "|")) ; r7rs: '|\|| +(define arrsym (string->symbol "->")) ; r6rs/r7rs: '-> + (define-syntax receive (syntax-rules () ((receive formals expression body ...) @@ -31,7 +34,7 @@ (- (char->integer c) (char->integer #\0))) (define (fail s msg . args) - (raise (list 'string-sre->sre s msg args))) + (raise (list 'ssre->sre s msg args))) (define opar #\() (define cpar #\)) @@ -56,7 +59,14 @@ ((eqv? (car p) (car s)) (prefix? (cdr p) (cdr s))) (else #f))) +(define (sre=? s1 s2) + (or (eqv? s1 s2) + (and (char-set? s1) (char-set? s2) (char-set= s1 s2)) ; requires srfi 14 + (and (string? s1) (string? s2) (string=? s1 s2)) + (and (pair? s1) (pair? s2) (sre=? (car s1) (car s2)) (sre=? (cdr s1) (cdr s2))))) + ; Option flags are symbols from the {i, m, s, x, n, u} set +; For convenience, they are combined with definitions in a single env (define o-set? memq) (define (o-add f o) @@ -69,6 +79,10 @@ (cond ((null? o) #f) ((and (pair? (car o)) (eq? n (caar o))) (car o)) (else (o-lookup n (cdr o))))) +(define (o-reverse-lookup sre o) + (cond ((null? o) #f) + ((and (pair? (car o)) (sre=? sre (caddar o))) (car o)) + (else (o-reverse-lookup sre (cdr o))))) (define (o-skip s o) (if (o-set? 'x o) (skip s) s)) (define (o-wrappers o0 o1) ;=> (w/x w/y ...) @@ -83,9 +97,19 @@ (loop (o-del 'i o) (cons 'w/nocase wl))) (else wl)))) +; NB: if your implementation of the SRE SRFI supports infinite upper bounds in the +; **/**? forms, return the corresponding SRE value; otherwise, uncomment the 'fail' +; variants. Here we assume that such a bound is denoted by #f, as in IrRegex and +; Alex Shinn's reference implementation +(define (infub) #f) ; e.g. #f, fx-greatest, +inf.0, ... +(define (infub? x) (eqv? x #f)) +; (define (infub) (fail "no support for infinite upper bounds in **/**?")) +; (define (infub? x) #f) + ; SRE consructors (define (e-e) '(:)) +(define (e-null) '(or)) (define (e-bos o) (if (o-set? 'm o) 'bol 'bos)) (define (e-eos o) (if (o-set? 'm o) 'eol 'eos)) (define (e-dot o) (if (o-set? 's o) 'any 'nonl)) @@ -110,19 +134,22 @@ (define (with-e wl e) (if (null? wl) e (list (car wl) (with-e (cdr wl) e)))) (define (or-e e1 e2) - (if (and (pair? e1) (eqv? (car e1) 'or)) - (append e1 (list e2)) - (list 'or e1 e2))) + (cond ((equal? e2 '(or)) e1) + ((and (pair? e1) (eqv? (car e1) 'or)) + (append e1 (list e2))) + (else (list 'or e1 e2)))) (define (and-e e1 e2) - (cond ((and (pair? e1) (eqv? (car e1) 'and)) + (cond ((or (equal? e1 '(or)) (equal? e2 '(or))) '(or)) + ((and (pair? e1) (eqv? (car e1) 'and)) (append e1 (list e2))) ((and (pair? e2) (eqv? (car e2) '~)) (list '- e1 (cadr e2))) (else (list 'and e1 e2)))) (define (diff-e e1 e2) - (if (and (pair? e1) (eqv? (car e1) 'diff)) - (append e1 (list e2)) - (list '- e1 e2))) + (cond ((or (equal? e1 '(or)) (equal? e2 '(or))) e1) + ((and (pair? e1) (eqv? (car e1) 'diff)) + (append e1 (list e2))) + (else (list '- e1 e2)))) (define (range-e e1 e2) (list 'char-range e1 e2)) (define (inv-e e) @@ -150,21 +177,14 @@ (cond ((not n) (list '>= m e)) ((eqv? m n) (list '= m e)) (else (list '** m n e)))) -; NB: here we rely on a 'hidden feature' of the SRE specification: the second counter -; of the ** repeat can be #f (standing in for infinity); this extension is supported -; by Alex Shinn's reference implementation for both ** and **?, which makes it unnecessary -; to have nongreedy version of >= and/or duplicate repeated expression as a workaround -; If your SRE implementation does not support it, you may use (: e (*? e)) for +? and -; (: (**? m m e) (*? e)) for +=? if not for the fact that duplicated groups will not -; be counted properly (define (opt-e e) (if (pair? e) (case (car e) ((?) `(?? ,(cadr e))) ((*) `(*? ,(cadr e))) - ((+) `(**? 1 #f ,(cadr e))) ; see note above + ((+) `(**? 1 ,(infub) ,(cadr e))) ; see note above ((=) `(**? ,(cadr e) ,(cadr e) ,(caddr e))) - ((>=) `(**? ,(cadr e) #f ,(caddr e))) ; see note above + ((>=) `(**? ,(cadr e) ,(infub) ,(caddr e))) ; see note above ((**) `(**? ,@(cdr e))) (else `(? ,e))) (list '? e))) @@ -172,7 +192,7 @@ (list e e1)) (define (group-e e) (list '$ e)) (define (ungroup-e e) (if (and (= (length e) 2) (eq? (car e) '$)) (cadr e) e)) -(define (namegroup-e name e) (list '-> name e)) +(define (namegroup-e name e) (list arrsym name e)) (define (backref-e n) (list 'backref n)) (define (lookahead-e e) (list 'look-ahead e)) (define (lookbehind-e e) (list 'look-behind e)) @@ -192,7 +212,7 @@ ; PCRE-like notation parser -(define (parse-re-spec src o) ;=> e, s +(define (parse-ssre-spec src o) ;=> e, s (define (parse-body s o) ;=> e, s (let ((s0 (prefix? "(?" s))) (if (and s0 (pair? s0) (or (char-alphabetic? (car s0)) (eqv? (car s0) #\-))) @@ -394,6 +414,13 @@ (define (check-bcnd t e s op) (unless (eq? t 'bcnd) (fail s (string-append op " applied no non-bcnd argument") e s))) + (define (parse-body s o) ;=> t, e, s + (cond ((prefix? "?" s) + (receive (s o1) (parse-re-options (cdr s) o) + (unless (prefix? ":" s) (fail s "missing : after option flags")) + (receive (t e s) (parse-body (o-skip (cdr s) o1) o1) + (values t (with-e (o-wrappers o o1) e) s)))) + (else (parse-or s o)))) (define (parse-or s o) ;=> t, e, s (receive (t e s) (parse-in s o) (let loop ((t t) (e e) (s (o-skip s o))) @@ -429,10 +456,13 @@ (check-bcnd t e (cdr s) "!") (values 'bcnd (not-e e) s1))) ((and (pair? s) (eqv? (car s) obrc)) - (receive (t e s) (parse-re-set (cdr s) o) - (when (or (null? s) (not (eqv? (car s) cbrc))) - (fail s "missing }")) - (values t e (cdr s)))) + (let ((s (o-skip (cdr s) o))) + (if (and (pair? s) (eqv? (car s) cbrc)) + (values 'cset (e-null) (cdr s)) ; {} => (or) + (receive (t e s) (parse-body s o) + (when (or (null? s) (not (eqv? (car s) cbrc))) + (fail s "missing }")) + (values t e (cdr s)))))) ((and (pair? s) (eqv? (car s) obrk)) (receive (cs s) (parse-re-class (cdr s) o) (when (or (null? s) (not (eqv? (car s) cbrk))) @@ -442,25 +472,480 @@ (define (parse-prim s0 o) ;=> t, e, s (define (name-char? c) (or (char-alphabetic? c) (eqv? c #\_) (eqv? c #\<) (eqv? c #\>))) - (if (or (eqv? (car s0) #\^) (eqv? (car s0) #\/)) - (let ((name (string->symbol (string (car s0))))) - (receive (t e) (ref-named-expr name o s0) - (values t e (cdr s0)))) - (let loop ((s s0) (l '())) - (cond ((and (pair? s) (name-char? (car s))) - (loop (cdr s) (cons (car s) l))) - ((pair? l) - (let ((name (string->symbol (list->string (reverse l))))) - (receive (t e) (ref-named-expr name o s0) - (values t e s)))) - (else (fail s0 "name expected")))))) - (receive (t e s) (parse-or (o-skip src o) o) - (values t e (o-skip s o)))) + (let loop ((s s0) (l '())) + (cond ((and (pair? s) (name-char? (car s))) + (loop (cdr s) (cons (car s) l))) + ((pair? l) + (let ((name (string->symbol (list->string (reverse l))))) + (receive (t e) (ref-named-expr name o s0) + (values t e s)))) + (else (fail s0 "name expected"))))) + (let ((s (o-skip src o))) + (if (and (pair? s) (eqv? (car s) cbrc)) + (values 'cset (e-null) s) ; {} => (or) + (receive (t e s) (parse-body s o) + (values t e (o-skip s o)))))) + + +; PCRE-like notation unparser (tries to use extended syntax only if necessary) + +(define (unfail msg . args) + (apply error 'sre->ssre msg args)) +#| sre->core-sre converts sre to a simplified 'core' grammar that looks like this: + ::= + | (cset ) ; *extension, marks cset-typed subtrees + | (bcnd ) ; *extension, marks bcnd-typed subtrees + | (named "tn") ; symbol names a definition of expr kind, "tn" can be #f + | (shortcut "tn") ; ::= #\X, and top name "tn" can be #f + | (** ) ; to matches; can be #f (meaning infinity) + | (**? ) ; to non-greedy matches; can be #f (meaning infinity) + | (or ...) ; zero or at least 2 alternatives + | (: ...) ; zero or at least 2 concatenees + | ($ ) ; numbered submatch + | (-> ) ; named submatch + | (w/case ) ; single-arg case and unicode toggling + | (w/nocase ) + | (w/ascii ) + | (w/unicode ) + | (look-ahead ) ; zero-width look-ahead assertion + | (look-behind ) ; zero-width look-behind assertion + | (neg-look-ahead ) ; zero-width negative look-ahead assertion + | (neg-look-behind ) ; zero-width negative look-behind assertion + | (backref ) ; match a previous submatch + + ::= + | + | (/ ) ; elementary char range + | (named "tn") ; symbol names a definition of csre kind, "tn" can be #f + | (shortcut "tn") ; ::= #\s | #\d | #\w | #\S | #\D | #\W, and "tn" can be #f + | (or ...) ; union of zero or at least two + | (& ...) ; intersection of zero or at least two + | (- ...) ; difference of at least two + | (~ ) ; complement of exactly one + | (w/case ) ; single-arg case and unicode toggling + | (w/nocase ) + | (w/ascii ) + | (w/unicode ) + + ::= + | (named "tn") ; symbol names a definition of bcnd kind, "tn" can be #f + | (shortcut "tn") ; ::= #\< | #\> | #\b | #\A | #\z, and "tn" can be #f + | (or ...) ; logical OR of zero or at least two + | (neg-look-ahead ) ; logical NOT of exactly one +|# + +(define (sre->core-sre sre o) ;=> cre + (define (range-spec? rs) (or (char? rs) (string? rs))) + (define (headed-list? x . hl) (and (pair? x) (memq (car x) hl) (list? (cdr x)))) + (define (list1? x) (and (pair? x) (null? (cdr x)))) + (define (list2? x) (and (pair? x) (list1? (cdr x)))) + (define (arg-or-seq l) (if (list1? l) (car l) (cons ': l))) + (define (arg-or-union l) (if (list1? l) (car l) (cons 'or l))) + (define (count? x) (and (number? x) (exact? x) (not (negative? x)))) + (define (alnum? x) (memq x '(alphanumeric alphanum alnum))) + (define (underscore? x) (member x '(#\_ ("_")))) + (define (wordcs? r) + (and (headed-list? r orsym 'or) (list2? (cdr r)) + (or (and (alnum? (cadr r)) (underscore? (caddr r))) (and (underscore? (cadr r)) (alnum? (caddr r)))))) + (define (wordbnd? r) + (and (headed-list? r orsym 'or) (member (cdr r) '((bow eow) (eow bow))))) + ; shortcuts do not depend on the current definitions paramteter + (define (shortcut-expr-sre? r) ;=> char | #f + (cond ((memq r '(grapheme)) #\X) + ((equal? r '(: (? #\newline) eos)) #\Z) ; mostly roundtripping hack + (else #f))) + (define (shortcut-cset-sre? r) ;=> char | #f + (cond ((memq r '(numeric num)) #\d) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (memq (cadr r) '(numeric num))) #\D) + ((memq r '(whitespace white space)) #\s) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (memq (cadr r) '(whitespace white space))) #\S) + ((wordcs? r) #\w) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (wordcs? (cadr r))) #\W) + (else #f))) + (define (shortcut-bcnd-sre? r) ;=> char | #f + (cond ((eq? r 'bos) #\A) ; better use ^ outside of charsets + ((eq? r 'eos) #\z) ; better use $ outside of charsets + ((eq? r 'bow) #\<) + ((eq? r 'eow) #\>) + ((wordbnd? r) #\b) + ((eq? r 'nwb) #\B) + ((and (headed-list? r 'neg-look-ahead) (list1? (cdr r)) (wordbnd? (cadr r))) #\B) + (else #f))) + ; try to map r to our 'canonical' name for reverse definition lookup + (define (named-class-sre? r) ;=> sym | #f + (cond ((assq r '((lower-case . lower) (lower . lower) (upper-case . upper) (upper . upper) + (title-case . title) (title . title) (alphabetic . alpha) (alpha . alpha) + (whitespace . space) (white . space) (space . space) (numeric . numeric) (num . numeric) + (alphanumeric . alnum) (alphanum . alnum) (alnum . alnum) (symbol . symbol) + (control . cntrl) (cntrl . cntrl) (printing . print) (print . print) + (graphic . graph) (graph . graph) (punctuation . punct) (punct . punct) + (hex-digit . xdigit) (xdigit . xdigit) (ascii . ascii))) => cdr) + (else #f))) + (define (lookup r o) + (let ((rv (or (and (symbol? r) (named-class-sre? r)) r))) + (o-reverse-lookup rv o))) + (define (lookup-name r o t) + (let ((x (lookup r o))) (and x (eq? (cadr x) t) (car x)))) + (define (flatten-char-ranges l) ;=> (start-char end-char ...) + (let flatten ((l l) (fl '())) + (cond ((null? l) (reverse fl)) + ((string? (car l)) (flatten (append (string->list (car l)) (cdr l)) fl)) + (else (flatten (cdr l) (cons (car l) fl)))))) + (define (lct t1 t2) ; least common type + (cond ((not t1) t2) ((not t2) t1) ((eq? t1 t2) t1) (else 'expr))) + (define (cast cr rt ct) ;=> cr' + (if (eq? rt ct) cr `(,rt ,cr))) + (define (cast-noncasted cr ct) ;=> (rt cr') + (if (headed-list? cr 'bcnd 'cset) cr `(,ct ,cr))) + ; do not sort, just cluster/merge same-type neighbors + (define (or-join r1 r2 ti) + (cond ;these tests shorten the output, but lead to quadratic behavior + ;((and (eq? ti 'cset) (equal? r1 r2) r1)) ; safe: no groups inside + ;((and (headed-list? r1 'or) (member r2 (cdr r1))) r1) + (else (or-e r1 r2)))) + (define (finalize-or rl tl) + (let loop ((rl rl) (tl tl) (ct #f) (crl '()) (ctl '())) + (if (null? rl) + (let ((l (reverse (map (lambda (r t) (cast r t ct)) crl ctl)))) + (cond ((null? l) (values '(or) 'cset)) + ((null? (cdr l)) (values (car l) ct)) + (else (values `(or . ,l) ct)))) + (let ((ri (car rl)) (ti (car tl))) + (if (and (pair? (cdr tl)) (eq? ti (cadr tl))) + (loop (cons (or-join ri (cadr rl) ti) (cddr rl)) (cons ti (cddr tl)) ct crl ctl) + (loop (cdr rl) (cdr tl) (lct ct ti) (cons ri crl) (cons ti ctl))))))) + (define (convert r o) + (let cvt ((r r)) + (cond + ((eq? r 'bos) (values `(shortcut #\A ,(lookup-name r o 'bcnd) "^") 'bcnd)) + ((eq? r 'eos) (values `(shortcut #\z ,(lookup-name r o 'bcnd) "$") 'bcnd)) + ((eq? r 'nonl) (values `(named ,(lookup-name r o 'cset) ".") 'cset)) + ((shortcut-expr-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'expr) #f) 'expr))) + ((shortcut-cset-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'cset) #f) 'cset))) + ((shortcut-bcnd-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'bcnd) #f) 'bcnd))) + ((lookup r o) => + (lambda (x) (values `(named ,(car x) #f) (cadr x)))) + ((string? r) + (cvt `(: . ,(string->list r)))) + ((headed-list? r '* 'zero-or-more) + (cvt `(** 0 #f . ,(cdr r)))) + ((headed-list? r '+ 'one-or-more) + (cvt `(** 1 #f . ,(cdr r)))) + ((headed-list? r '? 'optional) + (cvt `(** 0 1 . ,(cdr r)))) + ((and (headed-list? r '= 'exactly) (>= (length r) 2) (count? (cadr r))) + (cvt `(** ,(cadr r) ,(cadr r) . ,(cddr r)))) + ((and (headed-list? r '>= 'at-least) (>= (length r) 2) (count? (cadr r))) + (cvt `(** ,(cadr r) #f . ,(cddr r)))) + ((and (headed-list? r '** 'repeated) + (>= (length r) 3) (count? (cadr r)) (or (count? (caddr r)) (infub? (caddr r)))) + (if (and (eqv? (cadr r) 1) (eqv? (caddr r) 1)) + (cvt (arg-or-seq (cdddr r))) + (receive (cr ct) (cvt (arg-or-seq (cdddr r))) + (define ub (if (infub? (caddr r)) #f (caddr r))) + (values `(** ,(cadr r) ,ub ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r orsym 'or) + (let loop ((l (cdr r)) (rl '()) (tl '())) + (if (null? l) (finalize-or (reverse rl) (reverse tl)) + (receive (cr ct) (cvt (car l)) + (if (headed-list? cr 'or) ; splice in nested ors using ct for noncasted + (let* ((srl (reverse (cdr cr))) ; mix of casted and noncasted (ct) + (crl (map (lambda (r) (cast-noncasted r ct)) srl))) ; all casted + ; separate cast types and cres for the rest of the algorithm + (loop (cdr l) (append (map cadr crl) rl) (append (map car crl) tl))) + (loop (cdr l) (cons cr rl) (cons ct tl))))))) + ((headed-list? r ': 'seq) + (let loop ((l (cdr r)) (rl '())) + (if (null? l) + (let ((l (reverse rl))) + (cond ((null? l) (values '(:) 'expr)) ; epsilon + ((null? (cdr l)) (values (car l) 'expr)) + (else (values `(: . ,l) 'expr)))) + (receive (cr ct) (cvt (car l)) + (if (headed-list? cr ':) ; splice in nested :s + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons (cast cr ct 'expr) rl))))))) + ((headed-list? r '$ 'submatch) + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + (if (o-set? 'n o) (values cr ct) (values `($ ,(cast cr ct 'expr)) 'expr)))) + ((and (headed-list? r arrsym 'submatch-named) (>= (length r) 2) (symbol? (cadr r))) + (receive (cr ct) (cvt (arg-or-seq (cddr r))) + (if (o-set? 'n o) (values cr ct) (values `(,arrsym ,(cadr r) ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r 'w/case 'w/nocase 'w/unicode 'w/ascii) + ; since we don't allow multiargument w/xxx in context, we just + ; wrap multiple args in a seq, leaving a single arg as-is; type errors won't + ; allow the sequenced ones to be used in the context + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + (values `(,(car r) ,cr) ct))) ; do not cast cr to 'expr: has to work in all contexts + ((headed-list? r 'w/nocapture) + (convert (arg-or-seq (cdr r)) (o-add 'n o))) ; use 'n flag to kill BOTH numbered and named captures! + ((memq r '(bos eos bol eol bog eog bow eow nwb)) + (values r 'bcnd)) + ((memq r '(grapheme word)) + (values r 'expr)) + ((headed-list? r 'word) + (cvt `(: bow ,@(cdr r) eow))) + ((headed-list? r 'word+) + (if (equal? (cdr r) '(any)) + (values 'word 'expr) + (cvt `(word (+ (and (or alnum #\_) (or ,@(cdr r)))))))) + ((headed-list? r '*? 'non-greedy-zero-or-more) + (cvt `(**? 0 #f . ,(cdr r)))) + ((headed-list? r '?? 'non-greedy-optional) + (cvt `(**? 0 1 . ,(cdr r)))) + ((and (headed-list? r '**? 'non-greedy-repeated) + (>= (length r) 3) (count? (cadr r)) (or (count? (caddr r)) (infub? (caddr r)))) + (if (and (eqv? (cadr r) 1) (eqv? (caddr r) 1)) + (cvt (arg-or-seq (cdddr r))) + (receive (cr ct) (cvt (arg-or-seq (cdddr r))) + (define ub (if (infub? (caddr r)) #f (caddr r))) + (values `(**? ,(cadr r) ,ub ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r 'look-ahead 'look-behind 'neg-look-ahead 'neg-look-behind) + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + ; in truth, all lookarounds are boundary conditions, but we only want to keep a small + ; subset of them as conditions for the purpose of rendering them via the {..} notation + (define rct (if (and (eq? (car r) 'neg-look-ahead) (eq? ct 'bcnd)) 'bcnd 'expr)) + (values `(,(car r) ,(cast cr ct rct)) rct))) + ((and (headed-list? r 'backref) (= (length r) 2) (or (symbol? (cadr r)) (count? (cadr r)))) + (values r 'expr)) + ; csets, fall through + ((char? r) + (values r 'cset)) + ((and (list1? r) (string? (car r))) + (cvt (cons 'or (string->list (car r))))) + ((and (headed-list? r 'char-set) (list1? (cdr r)) (string? (cadr r))) + (cvt (cons 'or (string->list (cadr r))))) + ((and (headed-list? r '/ 'char-range) (andmap range-spec? (cdr r))) + (let loop ((cr* (flatten-char-ranges (cdr r))) (rl '())) + (cond ((and (null? cr*) (list1? rl)) (values (car rl) 'cset)) + ((null? cr*) (values `(or . ,(reverse rl)) 'cset)) + ((null? (cdr cr*)) (unfail "odd char count in char range SRE" r)) + ((eqv? (car cr*) (cadr cr*)) (loop (cddr cr*) (cons (car cr*) rl))) + ((char<=? (car cr*) (cadr cr*)) (loop (cddr cr*) (cons `(/ ,(car cr*) ,(cadr cr*)) rl))) + (else (unfail "invalid char range in SRE" r (car cr*) (cadr cr*)))))) + ((headed-list? r '& 'and) + (cond ((null? (cdr r)) (values '(&) 'cset)) ; neutral element for and, same as 'any' + ((null? (cddr r)) (cvt (cadr r))) ; idty: do not upgrade type? + (else (let loop ((l (cdr r)) (rl '())) + (if (null? l) (values `(& . ,(reverse rl)) 'cset) + (receive (cr ct) (cvt (car l)) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (& ...)" (car l))) + (if (headed-list? cr '&) ; splice in nested ands + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons cr rl))))))))) + ((and (headed-list? r '- 'difference) (pair? (cdr r))) + (cond ((null? (cddr r)) (cvt (cadr r))) ; idty: do not upgrade type? + (else (receive (cr0 ct0) (cvt (cadr r)) + (unless (eq? ct0 'cset) (unfail "non-cset argument inside (- ...)" (cadr r))) + (let loop ((l (cddr r)) (rl (list cr0))) + (if (null? l) (values `(- . ,(reverse rl)) 'cset) + (receive (cr ct) (cvt (car l)) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (& ...)" (car l))) + (if (headed-list? cr 'or) ; splice in nested ors + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons cr rl)))))))))) + ((headed-list? r '~ 'complement) + (receive (cr ct) (cvt (arg-or-union (cdr r))) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (~ ...)" r)) + (values `(~ ,cr) 'cset))) + (else (unfail "invalid or unsupported SRE" r))))) + ; start the conversion + (receive (cr ct) (convert sre o) + (cast cr ct 'expr))) + +; internal +(define (sre->csre sre) ;=> csre + (define ds (ssre-definitions)) + (sre->core-sre sre (cons 'u (ds-nes ds)))) + +; render csre grammar to a text port p +(define (unparse-csre-spec csre p) + (define (emit . xl) (for-each (lambda (x) (display x p)) xl)) + (define (emit-shortcut x) (if (char? x) (emit #\\ x) (emit x))) + (define (headed-list? x . hl) (and (pair? x) (memq (car x) hl) (list? (cdr x)))) + (define (options-prefix x) + (case x ((w/case) "-i") ((w/nocase) "i") ((w/unicode) "u") ((w/ascii) "-u"))) + (define (lookaround-prefix x) + (case x ((look-ahead) "=") ((neg-look-ahead) "!") ((look-behind) "<=") ((neg-look-behind) " (length (cdr r)) 1) (andmap cset-class-elt? (cdr r))) + (and (headed-list? r '~) (headed-list? (cadr r) 'or) + (pair? (cdadr r)) (andmap cset-class-elt? (cdadr r))))) + ; entry point + (define (unparse-top r) + ; recognize popular nondefault option prefixes, to save on : + (cond ((headed-list? r 'w/nocase 'w/case 'w/ascii 'w/unicode) + (emit "(?" (options-prefix (car r)) ")") (unparse-top (cadr r))) + ((and (headed-list? r 'cset) ; pull w/xxx out of cset wrapper + (headed-list? (cadr r) 'w/nocase 'w/case 'w/ascii 'w/unicode)) + (unparse-top `(,(car (cadr r)) (cset ,(cadr (cadr r)))))) + (else (unparse-body r)))) + (define (unparse-body r) + (unparse-alt r)) + (define (unparse-alt r) + (cond ((equal? r '(or)) ; special case + (emit "{}")) + ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-seq r)))) + (define (unparse-seq r) + (cond ((headed-list? r ':) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-seq (car l)) + (loop (cdr l))))) + (else (unparse-quant r)))) + (define (unparse-quant r) + (cond ((headed-list? r '** '**?) + ; make sure nested repeats, if they happen, are rendred as separate + ; so that no unexpected non-greedy combos are produced by the parser + (cond ((not (headed-list? (cadddr r) '** '**?)) (unparse-quant (cadddr r))) + (else (emit "(?:") (unparse-quant (cadddr r)) (emit ")"))) + (cond ((and (eqv? (cadr r) 0) (eqv? (caddr r) 1)) (emit #\?)) + ((and (eqv? (cadr r) 0) (not (caddr r))) (emit #\*)) + ((and (eqv? (cadr r) 1) (not (caddr r))) (emit #\+)) + ((eqv? (cadr r) (caddr r)) (emit #\{ (cadr r) #\})) + (else (emit #\{ (cadr r) #\, (or (caddr r) "") #\}))) + (when (eq? (car r) '**?) (emit #\?))) + (else (unparse-prim r)))) + (define (unparse-prim r) + (cond ((headed-list? r 'cset) (unparse-cset (cadr r))) + ((headed-list? r 'bcnd) (unparse-bcnd (cadr r))) + ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'named) + (cond ((caddr r) => emit) ; use "top name" if any + (else (emit "\\p{" (cadr r) "}")))) + ((headed-list? r '$) + (emit "(") (unparse-body (cadr r)) (emit ")")) + ((headed-list? r arrsym) + (emit "(?<" (cadr r) ">") (unparse-body (caddr r)) (emit ")")) + ((headed-list? r 'w/case 'w/nocase 'w/unicode 'w/ascii) + (emit "(?" (options-prefix (car r)) ":") (unparse-body (cadr r)) (emit ")")) + ((headed-list? r 'look-ahead 'neg-look-ahead 'look-behind 'neg-look-behind) + (emit "(?" (lookaround-prefix (car r))) (unparse-body (cadr r)) (emit ")")) + ((headed-list? r 'backref) + (cond ((symbol? (cadr r)) (emit "\\k<" (cadr r) ">")) + ((< (cadr r) 100) (emit "\\" (cadr r))) + (else (unfail "numerical backref out of range" (cadr r))))) + ((headed-list? r ': 'or) + (emit "(?:") (unparse-body r) (emit ")")) + (else (unfail "unsupported SRE" r)))) + (define (unparse-cset r) + (cond ((char? r) + (case r ((#\\ #\^ #\$ #\. #\| #\* #\+ #\? #\[ #\] #\( #\) #\{ #\}) (emit #\\))) + (emit r)) + ((and (headed-list? r 'named) (caddr r)) => emit) ; use "top name" if any + ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'w/case 'w/nocase 'w/ascii 'w/unicode) + (emit "(?" (options-prefix (car r)) #\:) (unparse-cset (cadr r)) (emit ")")) + ((cset-class? r) (unparse-cset-class r)) + (else (emit "{") (unparse-cset-body r) (emit "}")))) + (define (unparse-cset-body r) + (cond (else (unparse-cset-alt r)))) + (define (unparse-cset-alt r) + (cond ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-cset-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-cset-infix r)))) + (define (unparse-cset-infix r) + (cond ((headed-list? r '- '&) + (let loop ((l (cdr r))) + (cond ((null? (cdr l)) (unparse-cset-prefix (car l))) + (else (unparse-cset-prefix (car l)) + (if (headed-list? r '&) (emit #\&) (emit #\-)) + (loop (cdr l)))))) + (else (unparse-cset-prefix r)))) + (define (unparse-cset-prefix r) + (cond ((headed-list? r '~) + (emit "~") (unparse-cset-prefix (cadr r))) + (else (unparse-cset-prim r)))) + (define (unparse-cset-prim r) + (cond ((headed-list? r 'named) (emit (cadr r))) + ((headed-list? r 'shortcut) + (cond ((caddr r) => emit) ; use name if any + (else (emit-shortcut (cadr r))))) + ((cset-class? r) (unparse-cset-class r)) + ((headed-list? r 'w/case 'w/nocase 'w/ascii 'w/unicode) + (emit "{?" (options-prefix (car r)) #\:) (unparse-cset-body (cadr r)) (emit "}")) + ((headed-list? r 'or) + (emit "{") (unparse-cset-body r) (emit "}")) + (else (unfail "invalid SRE char set" r)))) + (define (unparse-cset-class r) + (cond ((headed-list? r '~) (emit "[^") (unparse-class-body (cadr r)) (emit "]")) + (else (emit "[") (unparse-class-body r) (emit "]")))) + (define (unparse-class-body r) + (cond ((headed-list? r 'or) (for-each unparse-class-elt (cdr r))) + (else (unparse-class-elt r)))) + (define (unparse-class-elt r) + (cond ((char? r) + (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r)) + ((headed-list? r '/) + (let ((r (cadr r))) (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r)) + (emit #\-) + (let ((r (caddr r))) (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r))) + ((headed-list? r 'named) (emit "[:" (cadr r) ":]")) + ((headed-list? r 'shortcut) (emit-shortcut (cadr r))) + (else (unfail "invalid SRE class element" r)))) + (define (unparse-bcnd r) + (cond ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + (else (emit "{") (unparse-bcnd-body r) (emit "}")))) + (define (unparse-bcnd-body r) + (unparse-bcnd-alt r)) + (define (unparse-bcnd-alt r) + (cond ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-bcnd-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-bcnd-prefix r)))) + (define (unparse-bcnd-prefix r) + (cond ((and (headed-list? r 'neg-look-ahead)) + (emit "!") (unparse-bcnd-prefix (cadr r))) + (else (unparse-bcnd-prim r)))) + (define (unparse-bcnd-prim r) + (cond ((char? r) (emit r)) + ((headed-list? r 'named) (emit (cadr r))) + ((headed-list? r 'shortcut) + (cond ((caddr r) => emit) ; use name if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'or) + (emit "{") (unparse-bcnd-body r) (emit "}")) + (else (unfail "invalid SRE boundary condition" r)))) + ; start here + (unparse-top csre)) + +; internal +(define (csre->ssre cs) + (let ((p (open-output-string))) + (unparse-csre-spec cs p) + (get-output-string p))) + +; (unparse-re-spec sre p o) (define named-exprs '( - (/ cset #\\) - (^ cset #\^) + (ascii cset ascii) + (nonl cset nonl) (any cset any) (_ cset any) (digit cset numeric) (n cset numeric) (d cset numeric) (lower cset lower) (l cset lower) @@ -473,10 +958,10 @@ (graph cset graph) (g cset graph) (symbol cset symbol) (y cset symbol) (print cset print) (gs cset print) - (blank cset (or #\space #\tab)) (h cset (or #\space #\tab)) + (blank cset (or #\space #\tab)) (h cset (or #\space #\tab)) ; ascii version (space cset space) (s cset space) (w cset (or alnum #\_)) - (v cset (- space (or #\space #\tab))) + (v cset (- space (or #\space #\tab))) ; ascii version (bos bcnd bos) ( bcnd eos) (bol bcnd bol) (string name)))))) ; definitions are wrapped into a ds structure with 2 extra slots to contain cached data; -; cache #1 is for string-sre->sre, cache #2 for string-sre->regexp +; cache #1 is for ssre->sre, cache #2 for ssre->regexp (define (make-ds nes) (vector nes '() '())) (define (ds-nes ds) (vector-ref ds 0)) @@ -519,48 +1004,56 @@ ; a parameter procedure can be called with a value argument to set the parameter globally. ; This behavior is not required by R7RS. -(define string-sre-definitions - (make-parameter (make-ds named-exprs))) +(define ssre-definitions + ; named-exprs is reversed here to make sure o-reverse-lookup picks shorter names + (make-parameter (make-ds (reverse named-exprs)))) -(define (string-sre-bind n t e ds) - (make-ds (cons (list n t e) (ds-nes ds)))) +(define (ssre-bind n t e ds) + (make-ds (cons (list n t e) (ds-nes (ssre-unbind n ds))))) -(define (string-sre-unbind n ds) +(define (ssre-unbind n ds) (define (unbind n nes) (cond ((null? nes) nes) ((and (pair? nes) (pair? (car nes)) (eq? (caar nes) n)) (unbind n (cdr nes))) - (else (cons (car nes) (string-sre-unbind n (cdr nes)))))) + (else (cons (car nes) (unbind n (cdr nes)))))) (make-ds (unbind n (ds-nes ds)))) (define (ssre-fancy-error str src msg args) (define p (- (string-length str) (length src))) - (define m (string-append "string-sre->sre: " msg)) + (define m (string-append "ssre->sre: " msg)) (when (>= p 0) ; todo: what if str is multi-line? pick p line only! (set! m (string-append m "\n" str "\n" (make-string p #\space) "^"))) (apply error m args)) -(define (string-sre-syntax-error? x) - (and (list? x) (= (length x) 4) (eq? (car x) 'string-sre->sre) +(define (ssre-syntax-error? x) + (and (list? x) (= (length x) 4) (eq? (car x) 'ssre->sre) (string? (cadr x)) (string? (caddr x)) (list? (cadddr x)))) -(define (string-sre->sre str) - (define ds (string-sre-definitions)) - (define cs (cache-slot ds 1 str)) ; cache #1 is for string-sre->sre +(define (ssre->sre str) + (define ds (ssre-definitions)) + (define cs (cache-slot ds 1 str)) ; cache #1 is for ssre->sre (or (cdr cs) - (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) - (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) + (guard (x ((ssre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) + (receive (e s) (parse-ssre-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (set-cdr! cs e) e)))) -(define (string-sre->regexp str) - (define ds (string-sre-definitions)) - (define cs (cache-slot ds 2 str)) ; cache #2 is for string-sre->regexp +(define (ssre->regexp str) + (define ds (ssre-definitions)) + (define cs (cache-slot ds 2 str)) ; cache #2 is for ssre->regexp (or (cdr cs) - (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) - (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) + (guard (x ((ssre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) + (receive (e s) (parse-ssre-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (let ((re (regexp e))) (set-cdr! cs re) re))))) -)) + +(define (sre->ssre sre) + (define ds (ssre-definitions)) + (let ((p (open-output-string))) + (unparse-csre-spec (sre->core-sre sre (cons 'u (ds-nes ds))) p) + (get-output-string p))) + +)) \ No newline at end of file diff --git a/ssre-tests.scm b/ssre-tests.scm index d2a8dae..43f501f 100644 --- a/ssre-tests.scm +++ b/ssre-tests.scm @@ -1,13 +1,6 @@ ;;; SPDX-FileCopyrightText: 2025 Sergei Egorov ;;; SPDX-License-Identifier: MIT -(define (ssre->sre/opts s . o*) - (if (pair? o*) - (let* ((os (apply string-append (map symbol->string o*))) - (s (string-append "(?" os ")" s))) - (string-sre->sre s)) - (string-sre->sre s))) - (define *tests-run* 0) (define *tests-passed* 0) @@ -65,1453 +58,2795 @@ (define-syntax test-ssre (syntax-rules () - ((test-ssre pat o* res) - (test-equal 'res (apply ssre->sre/opts 'pat 'o*))))) + ((test-ssre pat res) + (test-equal 'res (ssre->sre 'pat))))) + +(define-syntax test-sre + (syntax-rules () + ((test-sre sre ssre) + (test-equal 'ssre (sre->ssre 'sre))))) + +; save default definitions +(define *ssre-definitions* (ssre-definitions)) + +; add some random definitions for the ssre tests +(ssre-definitions + (ssre-bind 'Any 'cset 'any + (ssre-bind 'Nd 'cset 'numeric + (ssre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w) + (ssre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W) + (ssre-bind 'L 'cset 'alpha + (ssre-bind 'Ll 'cset 'lower + (ssre-bind 'Lu 'cset 'upper + *ssre-definitions*)))))))) + +; NOTE: translations on the right are not the only correct ones; there can be equivalent translations, which are also correct + +(test-ssre "the quick brown fox" (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x)) +(test-ssre "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz" (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) +(test-ssre "^(abc){1,2}zz" (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}c" (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "^(ba|b*){1,2}?bc" (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) +(test-ssre "^[ab\\]cde]" (: bos (or #\a #\b #\] #\c #\d #\e))) +(test-ssre "^[]cde]" (: bos (or #\] #\c #\d #\e))) +(test-ssre "^[^ab\\]cde]" (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) +(test-ssre "^[^]cde]" (: bos (~ (or #\] #\c #\d #\e)))) +(test-ssre "^@" (: bos #\@)) +(test-ssre "^[0-9]+$" (: bos (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.*nter" (: bos (* nonl) #\n #\t #\e #\r)) +(test-ssre "^xxx[0-9]+$" (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.+[0-9][0-9][0-9]$" (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^.+?[0-9][0-9][0-9]$" (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) +(test-ssre ":" #\:) +(test-ssre "([\\da-f:]+)$" (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos)) +(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) +(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) +(test-ssre "^(?=ab(de))(abd)(e)" (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) +(test-ssre "^(?!(ab)de|x)(abd)(f)" (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) +(test-ssre "^(?=(ab(cd)))(ab)" (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) +(test-ssre "^[\\da-f](\\.[\\da-f])*$" (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos)) +(test-ssre "^\".*\"\\s*(;.*)?$" (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos)) +(test-ssre "^$" (: bos eos)) +(test-ssre "(?x)^ a\\ b[c ]d $" (: bos #\a #\space #\b (or #\c #\space) #\d eos)) +(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) +(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) +(test-ssre "^[\\w][\\W][\\s][\\S][\\d][\\D]\\]" (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\])) +(test-ssre "^[.^$|()*+?{,}]+" (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) +(test-ssre "^a*\\w" (: bos (* #\a) (or alnum #\_))) +(test-ssre "^a*?\\w" (: bos (*? #\a) (or alnum #\_))) +(test-ssre "^a+\\w" (: bos (+ #\a) (or alnum #\_))) +(test-ssre "^a+?\\w" (: bos (**? 1 #f #\a) (or alnum #\_))) +(test-ssre "^\\d{8}\\w{2,}" (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) +(test-ssre "^[aeiou\\d]{4,5}$" (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) +(test-ssre "^[aeiou\\d]{4,5}?" (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) +(test-ssre "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]" (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric)) +(test-ssre "^12.34" (: bos #\1 #\2 nonl #\3 #\4)) +(test-ssre "foo(?!bar)(.*)" (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) +(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "(?!^)abc" (: (neg-look-ahead bos) #\a #\b #\c)) +(test-ssre "(?=^)abc" (: (look-ahead bos) #\a #\b #\c)) +(test-ssre "^[ab]{1,3}(ab*|b)" (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) +(test-ssre "^[ab]{1,3}?(ab*|b)" (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) +(test-ssre "^[ab]{1,3}?(ab*?|b)" (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) +(test-ssre "^[ab]{1,3}(ab*?|b)" (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) +(test-ssre "^[W-c]+$" (: bos (+ (char-range #\W #\c)) eos)) +(test-ssre "^[?-_]+$" (: bos (+ (char-range #\? #\_)) eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "\\Aabc\\z" (: bos #\a #\b #\c eos)) +(test-ssre "\\A(.)*\\z" (: bos (* ($ nonl)) eos)) +(test-ssre "(?:b)|(?::+)" (or #\b (+ #\:))) +(test-ssre "[-az]+" (+ (or #\- #\a #\z))) +(test-ssre "[az-]+" (+ (or #\a #\z #\-))) +(test-ssre "[a\\-z]+" (+ (or #\a #\- #\z))) +(test-ssre "[a-z]+" (+ (char-range #\a #\z))) +(test-ssre "[\\d-]+" (+ (or numeric #\-))) +(test-ssre "\\\\" #\\) +(test-ssre "a{0}bc" (: (= 0 #\a) #\b #\c)) +(test-ssre "(a|(bc)){0,0}?xyz" (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) +(test-ssre "^([^a])([^b])([^c]*)([^d]{3,4})" (: bos ($ (~ #\a)) ($ (~ #\b)) ($ (* (~ #\c))) ($ (** 3 4 (~ #\d))))) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^k]$" (: (~ #\k) eos)) +(test-ssre "[^k]{2,3}$" (: (** 2 3 (~ #\k)) eos)) +(test-ssre "^\\d{8,}@.+[^k]$" (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^az]" (~ (or #\a #\z))) +(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) +(test-ssre "P[^*]TAIRE[^*]{1,}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) +(test-ssre "(\\.\\d\\d[1-9]?)\\d+" (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) +(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) +(test-ssre "\\b(foo)\\s+(\\w+)" (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_))))) +(test-ssre "foo(.*)bar" (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) +(test-ssre "foo(.*?)bar" (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) +(test-ssre "(.*)(\\d*)" (: ($ (* nonl)) ($ (* numeric)))) +(test-ssre "(.*)(\\d+)" (: ($ (* nonl)) ($ (+ numeric)))) +(test-ssre "(.*?)(\\d*)" (: ($ (*? nonl)) ($ (* numeric)))) +(test-ssre "(.*?)(\\d+)" (: ($ (*? nonl)) ($ (+ numeric)))) +(test-ssre "(.*)(\\d+)$" (: ($ (* nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*?)(\\d+)$" (: ($ (*? nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*)\\b(\\d+)$" (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) +(test-ssre "(.*\\D)(\\d+)$" (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) +(test-ssre "^\\D*(?!123)" (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^[W-]46\\]" (: bos (or #\W #\-) #\4 #\6 #\])) +(test-ssre "^[W-\\]46]" (: bos (or (char-range #\W #\]) #\4 #\6))) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "^(a){0,0}" (: bos (= 0 ($ #\a)))) +(test-ssre "^(a){0,1}" (: bos (** 0 1 ($ #\a)))) +(test-ssre "^(a){0,2}" (: bos (** 0 2 ($ #\a)))) +(test-ssre "^(a){0,3}" (: bos (** 0 3 ($ #\a)))) +(test-ssre "^(a){0,}" (: bos (>= 0 ($ #\a)))) +(test-ssre "^(a){1,1}" (: bos (= 1 ($ #\a)))) +(test-ssre "^(a){1,2}" (: bos (** 1 2 ($ #\a)))) +(test-ssre "^(a){1,3}" (: bos (** 1 3 ($ #\a)))) +(test-ssre "^(a){1,}" (: bos (>= 1 ($ #\a)))) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".{0,}\\.gif" (: (>= 0 nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "^.*B" (: bos (* nonl) #\B)) +(test-ssre "(?m)^.*B" (: bol (* nonl) #\B)) +(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[abc]{12}" (: bos (= 12 (or #\a #\b #\c)))) +(test-ssre "^[a-c]{12}" (: bos (= 12 (char-range #\a #\c)))) +(test-ssre "^(a|b|c){12}" (: bos (= 12 ($ (or #\a #\b #\c))))) +(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) +(test-ssre "abcde{0,0}" (: #\a #\b #\c #\d (= 0 #\e))) +(test-ssre "ab[cd]{0,0}e" (: #\a #\b (= 0 (or #\c #\d)) #\e)) +(test-ssre "ab(c){0,0}d" (: #\a #\b (= 0 ($ #\c)) #\d)) +(test-ssre "a(b*)" (: #\a ($ (* #\b)))) +(test-ssre "ab\\d{0}e" (: #\a #\b (= 0 numeric) #\e)) +(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) +(test-ssre ".*?" (*? nonl)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?c" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "(?!\\A)x" (: (neg-look-ahead bos) #\x)) +(test-ssre "(A|B)*?CD" (: (*? ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(A|B)*CD" (: (* ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(?= 0 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "ab{3,4}bc" (: #\a (** 3 4 #\b) #\b #\c)) +(test-ssre "ab{4,5}bc" (: #\a (** 4 5 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab{0,1}bc" (: #\a (** 0 1 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab?c" (: #\a (? #\b) #\c)) +(test-ssre "ab{0,1}c" (: #\a (** 0 1 #\b) #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "^" bos) +(test-ssre "$" eos) +(test-ssre "a.c" (: #\a nonl #\c)) +(test-ssre "a.*c" (: #\a (* nonl) #\c)) +(test-ssre "a[bc]d" (: #\a (or #\b #\c) #\d)) +(test-ssre "a[b-d]e" (: #\a (char-range #\b #\d) #\e)) +(test-ssre "a[b-d]" (: #\a (char-range #\b #\d))) +(test-ssre "a[-b]" (: #\a (or #\- #\b))) +(test-ssre "a[b-]" (: #\a (or #\b #\-))) +(test-ssre "a\\]" (: #\a #\])) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[^bc]d" (: #\a (~ (or #\b #\c)) #\d)) +(test-ssre "a[^-b]c" (: #\a (~ (or #\- #\b)) #\c)) +(test-ssre "a[^]b]c" (: #\a (~ (or #\] #\b)) #\c)) +(test-ssre "\\ba\\b" (: (or bow eow) #\a (or bow eow))) +(test-ssre "\\by\\b" (: (or bow eow) #\y (or bow eow))) +(test-ssre "\\Ba\\B" (: nwb #\a nwb)) +(test-ssre "\\By\\b" (: nwb #\y (or bow eow))) +(test-ssre "\\by\\B" (: (or bow eow) #\y nwb)) +(test-ssre "\\By\\B" (: nwb #\y nwb)) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "a\\sb" (: #\a space #\b)) +(test-ssre "a\\Sb" (: #\a (~ space) #\b)) +(test-ssre "\\d" numeric) +(test-ssre "\\D" (~ numeric)) +(test-ssre "ab|cd" (or (: #\a #\b) (: #\c #\d))) +(test-ssre "()ef" (: ($ (:)) #\e #\f)) +(test-ssre "$b" (: eos #\b)) +(test-ssre "a\\(b" (: #\a #\( #\b)) +(test-ssre "a\\(*b" (: #\a (* #\() #\b)) +(test-ssre "a\\\\b" (: #\a #\\ #\b)) +(test-ssre "((a))" ($ ($ #\a))) +(test-ssre "(a)b(c)" (: ($ #\a) #\b ($ #\c))) +(test-ssre "a+b+c" (: (+ #\a) (+ #\b) #\c)) +(test-ssre "a{1,}b{1,}c" (: (>= 1 #\a) (>= 1 #\b) #\c)) +(test-ssre "a.+?c" (: #\a (**? 1 #f nonl) #\c)) +(test-ssre "(a+|b)*" (* ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,}" (>= 0 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)+" (+ ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){1,}" (>= 1 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)?" (? ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,1}" (** 0 1 ($ (or (+ #\a) #\b)))) +(test-ssre "[^ab]*" (* (~ (or #\a #\b)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a*" (* #\a)) +(test-ssre "([abc])*d" (: (* ($ (or #\a #\b #\c))) #\d)) +(test-ssre "([abc])*bcd" (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) +(test-ssre "a|b|c|d|e" (or #\a #\b #\c #\d #\e)) +(test-ssre "(a|b|c|d|e)f" (: ($ (or #\a #\b #\c #\d #\e)) #\f)) +(test-ssre "abcd*efg" (: #\a #\b #\c (* #\d) #\e #\f #\g)) +(test-ssre "ab*" (: #\a (* #\b))) +(test-ssre "(ab|cd)e" (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "[abhgefdc]ij" (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) +(test-ssre "^(ab|cd)e" (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "(abc|)ef" (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) +(test-ssre "(a|b)c*d" (: ($ (or #\a #\b)) (* #\c) #\d)) +(test-ssre "(ab|ab*)bc" (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) +(test-ssre "a([bc]*)c*" (: #\a ($ (* (or #\b #\c))) (* #\c))) +(test-ssre "a([bc]*)(c*d)" (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]+)(c*d)" (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]*)(c+d)" (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) +(test-ssre "a[bcd]*dcdcde" (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "a[bcd]+dcdcde" (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "(ab|a)b*c" (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) +(test-ssre "((a)(b)c)(d)" (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) +(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) +(test-ssre "^a(bc+|b[eh])g|.h$" (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) +(test-ssre "(bc+d$|ef*g.|h?i(j|k))" ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) +(test-ssre "((((((((((a))))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) +(test-ssre "(((((((((a)))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) +(test-ssre "multiple words of text" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) +(test-ssre "multiple words" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) +(test-ssre "(.*)c(.*)" (: ($ (* nonl)) #\c ($ (* nonl)))) +(test-ssre "\\((.*), (.*)\\)" (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) +(test-ssre "[k]" #\k) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(bc)d" (: #\a ($ (: #\b #\c)) #\d)) +(test-ssre "a[-]?c" (: #\a (? #\-) #\c)) +(test-ssre "a(?!b)." (: #\a (neg-look-ahead #\b) nonl)) +(test-ssre "a(?=d)." (: #\a (look-ahead #\d) nonl)) +(test-ssre "a(?=c|d)." (: #\a (look-ahead (or #\c #\d)) nonl)) +(test-ssre "a(?:b|c|d)(.)" (: #\a (or #\b #\c #\d) ($ nonl))) +(test-ssre "a(?:b|c|d)*(.)" (: #\a (* (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+?(.)" (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+(.)" (: #\a (+ (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){2}(.)" (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}(.)" (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}?(.)" (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}(.)" (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}?(.)" (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}(.)" (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}?(.)" (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}(.)" (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}?(.)" (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) +(test-ssre "^(.+)?B" (: bos (? ($ (+ nonl))) #\B)) +(test-ssre "^([^a-z])|(\\^)$" (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) +(test-ssre "^[<>]&" (: bos (or #\< #\>) #\&)) +(test-ssre "(?<=a)b" (: (look-behind #\a) #\b)) +(test-ssre "(?a+)ab" (: ($ (: #\> (+ #\a))) #\a #\b)) +(test-ssre "b\\z" (: #\b eos)) +(test-ssre "(?<=\\d{3}(?!999))foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) +(test-ssre "(?<=(?!...999)\\d{3})foo" (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}(?!999)...)foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_))) +(test-ssre "a*\\d*\\w" (: (* #\a) (* numeric) (or alnum #\_))) +(test-ssre "(?x)a*b *\\w" (: (* #\a) (* #\b) (or alnum #\_))) +(test-ssre "(?x)a* b *\\w" (: (* #\a) (* #\b) (or alnum #\_))) +(test-ssre "\\z(?= 2 ($ (** 2 3 #\a)))) #\a)) +(test-ssre "(?=C)" (look-ahead #\C)) +(test-ssre "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)" (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9)))))) +(test-ssre "^(a){2,}+(\\w)" (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_)))) +(test-ssre "^(?:a){2,}+(\\w)" (: bos (+ (>= 2 #\a)) ($ (or alnum #\_)))) +(test-ssre "\\A.*?(a|bc)" (: bos (*? nonl) ($ (or #\a (: #\b #\c))))) +(test-ssre "\\A.*?(?:a|bc|d)" (: bos (*? nonl) (or #\a (: #\b #\c) #\d))) +(test-ssre "(?:.*?a)(?<=ba)" (: (*? nonl) #\a (look-behind (: #\b #\a)))) +(test-ssre "a(?=bc).|abd" (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d))) +(test-ssre "\\A.*?(?:a|bc)" (: bos (*? nonl) (or #\a (: #\b #\c)))) +(test-ssre "^\\d*\\w{4}" (: bos (* numeric) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^a*\\w{4}" (: bos (* #\a) (= 4 (or alnum #\_)))) +(test-ssre "(?:(?foo)|(?bar))\\k" (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "(?A)(?:(?foo)|(?bar))\\k" (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc" (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c)) +(test-ssre "\\sabc" (: space #\a #\b #\c)) +(test-ssre "Z*(|d*){216}" (: (* #\Z) (= 216 ($ (or (:) (* #\d)))))) +(test-ssre "(?<=a(B){0}c)X" (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X)) +(test-ssre "a+(?:|b)a" (: (+ #\a) (or (:) #\b) #\a)) +(test-ssre "X?(R||){3335}" (: (? #\X) (= 3335 ($ (or #\R (:) (:)))))) +(test-ssre "(?!(b))c|b" (or (: (neg-look-ahead ($ #\b)) #\c) #\b)) +(test-ssre "(?=(b))b|c" (or (: (look-ahead ($ #\b)) #\b) #\c)) +(test-ssre "<(?x:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?xxx:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "<(?-x:[a b])>" (: #\< (or #\a #\space #\b) #\>)) +(test-ssre "[[:digit:]-]+" (+ (or numeric #\-))) +(test-ssre "(?<=(?=.)?)" (look-behind (? (look-ahead nonl)))) +(test-ssre "(?<=(?=.){4,5})" (look-behind (** 4 5 (look-ahead nonl)))) +(test-ssre "(?<=(?=.){4,5}x)" (look-behind (: (** 4 5 (look-ahead nonl)) #\x))) +(test-ssre " (? \\w+ )* \\. " (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space)) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "(?<=(?=(?<=a)))b" (: (look-behind (look-ahead (look-behind #\a))) #\b)) +(test-ssre "(?<=ab?c)..." (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl)) +(test-ssre "(?<=PQR|ab?c)..." (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl)) +(test-ssre "(?<=ab?c|PQR)..." (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl)) +(test-ssre "(?<=PQ|ab?c)..." (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl)) +(test-ssre "(?<=ab?c|PQ)..." (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl)) +(test-ssre "(?<=a(b?c|d?e?e)f)X." (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl)) +(test-ssre "(?= 5 (char-range #\a #\z)) #\b) #\x)) +(test-ssre "[a-z]{1,6}?s|x" (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x)) +(test-ssre "[@]" #\@) +(test-ssre "@" #\@) +(test-ssre "@@@xxx" (: #\@ #\@ #\@ #\x #\x #\x)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "shortutf" (: #\s #\h #\o #\r #\t #\u #\t #\f)) +(test-ssre "anything" (: #\a #\n #\y #\t #\h #\i #\n #\g)) +(test-ssre "badutf" (: #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=x)badutf" (: (look-behind #\x) #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=xx)badutf" (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f)) +(test-ssre "(?<=xxxx)badutf" (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f)) +(test-ssre "X" #\X) +(test-ssre "a+" (+ #\a)) +(test-ssre "A" #\A) +(test-ssre "x" #\x) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "X" #\X) +(test-ssre "(?<=.)X" (: (look-behind nonl) #\X)) +(test-ssre "a+" (+ #\a)) +(test-ssre "a" #\a) +(test-ssre "." nonl) +(test-ssre "s" #\s) +(test-ssre "[^s]" (~ #\s)) +(test-ssre "a(?:.)*?a" (: #\a (*? nonl) #\a)) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "a\\b" (: #\a (or bow eow))) +(test-ssre "abc(?=abcde)(?=ab)" (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) +(test-ssre "(?<=abc)123" (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) +(test-ssre "\\babc\\b" (: (or bow eow) #\a #\b #\c (or bow eow))) +(test-ssre "(?<=abc)def" (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) +(test-ssre "abc(?<=bc)def" (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f)) +(test-ssre "(?<=ab)cdef" (: (look-behind (: #\a #\b)) #\c #\d #\e #\f)) +(test-ssre "b(?tom|bon)-\\k" (: (-> A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A))) +(test-ssre "Xa{2,4}b" (: #\X (** 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}?b" (: #\X (**? 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}+b" (: #\X (+ (** 2 4 #\a)) #\b)) +(test-ssre "X\\d{2,4}b" (: #\X (** 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}?b" (: #\X (**? 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}+b" (: #\X (+ (** 2 4 numeric)) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X[abc]{2,4}b" (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}?b" (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}+b" (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "Z(?!)" (: #\Z (neg-look-ahead (:)))) +(test-ssre "dog(sbody)?" (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y))))) +(test-ssre "dog(sbody)??" (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y))))) +(test-ssre "dog|dogsbody" (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y))) +(test-ssre "dogsbody|dog" (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g))) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "(?<=abc)123" (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) +(test-ssre "\\babc\\b" (: (or bow eow) #\a #\b #\c (or bow eow))) +(test-ssre "a?b?" (: (? #\a) (? #\b))) +(test-ssre "^a?b?" (: bos (? #\a) (? #\b))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "(?<=abc)def" (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "abc\\z" (: #\a #\b #\c eos)) +(test-ssre "abc\\b" (: #\a #\b #\c (or bow eow))) +(test-ssre "abc\\B" (: #\a #\b #\c nwb)) +(test-ssre ".+" (+ nonl)) +(test-ssre "(?<=(abc)+)X" (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X)) +(test-ssre "(a)b|ac" (or (: ($ #\a) #\b) (: #\a #\c))) +(test-ssre "(a)(b)x|abc" (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c))) +(test-ssre "(?:(foo)|(bar)|(baz))X" (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X)) +(test-ssre "(ab)x|ab" (or (: ($ (: #\a #\b)) #\x) (: #\a #\b))) +(test-ssre "(((((a)))))" ($ ($ ($ ($ ($ #\a)))))) +(test-ssre "a*?b*?" (: (*? #\a) (*? #\b))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a(b)c" (: #\a ($ #\b) #\c)) +(test-ssre "(a)(b)|(c)" (or (: ($ #\a) ($ #\b)) ($ #\c))) +(test-ssre "(?a)|(?b)" (or (-> A #\a) (-> A #\b))) +(test-ssre "a(b)c(d)" (: #\a ($ #\b) #\c ($ #\d))) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre ".*\\d" (: (* nonl) numeric)) +(test-ssre "(abc)*" (* ($ (: #\a #\b #\c)))) +(test-ssre "^" bos) +(test-ssre "(?:ab)?(?:ab)(?:ab)" (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "(abcd)" ($ (: #\a #\b #\c #\d))) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(b)c" (: #\a ($ #\b) #\c)) +(test-ssre "0b 28 3f 2d 78 29 3a" (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a)) +(test-ssre "a|(b)c" (or #\a (: ($ #\b) #\c))) +(test-ssre "efg" (: #\e #\f #\g)) +(test-ssre "eff" (: #\e #\f #\f)) +(test-ssre "effg" (: #\e #\f #\f #\g)) +(test-ssre "aaa" (: #\a #\a #\a)) +(test-ssre "(?)" (: #\[ ($ (:)) (= 65535 #\]) (-> A (:)))) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "\\z" eos) +(test-ssre "\\Z" (: (? #\newline) eos)) +(test-ssre "(?![ab]).*" (: (neg-look-ahead (or #\a #\b)) (* nonl))) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "12345(?<=\\d{1,256})X" (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X)) +(test-ssre "(?foo)|(?bar))\\k" (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) +(test-ssre "a?b[]xy]*c" (: (? #\a) #\b (* (or #\] #\x #\y)) #\c)) +(test-ssre "f*" (* #\f)) +(test-ssre "foo\\*" (: #\f #\o #\o #\*)) +(test-ssre "foo\\*bar" (: #\f #\o #\o #\* #\b #\a #\r)) +(test-ssre "f\\\\oo" (: #\f #\\ #\o #\o)) +(test-ssre "[ten]" (or #\t #\e #\n)) +(test-ssre "t[a-g]n" (: #\t (char-range #\a #\g) #\n)) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[]a-]b" (: #\a (or #\] #\a #\-) #\b)) +(test-ssre "a[]-]b" (: #\a (or #\] #\-) #\b)) +(test-ssre "a[]a-z]b" (: #\a (or #\] (char-range #\a #\z)) #\b)) +(test-ssre "\\]" #\]) +(test-ssre "t[!a-g]n" (: #\t (or #\! (char-range #\a #\g)) #\n)) +(test-ssre "A[+-0]B" (: #\A (char-range #\+ #\0) #\B)) +(test-ssre "a[--0]z" (: #\a (char-range #\- #\0) #\z)) +(test-ssre "a[[:digit:].]z" (: #\a (or numeric #\.) #\z)) +(test-ssre "A\\B\\\\C\\D" (: #\A nwb #\\ #\C (~ numeric))) +(test-ssre "a*b" (: (* #\a) #\b)) +(test-ssre "<[]bc]>" (: #\< (or #\] #\b #\c) #\>)) +(test-ssre "<[^]bc]>" (: #\< (~ (or #\] #\b #\c)) #\>)) +(test-ssre "a*b+c\\+[def](ab)\\(cd\\)" (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\))) +(test-ssre "how.to how\\.to" (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o)) +(test-ssre "^how to \\^how to" (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o)) +(test-ssre "^b\\(c^d\\)\\(^e^f\\)" (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\))) +(test-ssre "\\[()\\]{65535}()" (: #\[ ($ (:)) (= 65535 #\]) ($ (:)))) +(test-ssre "^A" (: bos #\A)) +(test-ssre "^\\w+" (: bos (+ (or alnum #\_)))) +(test-ssre "(.+)\\b(.+)" (: ($ (+ nonl)) (or bow eow) ($ (+ nonl)))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.{3,})b" (: #\a ($ (>= 3 nonl)) #\b)) +(test-ssre "a(.{3,}?)b" (: #\a ($ (**? 3 #f nonl)) #\b)) +(test-ssre "a(.{3,5})b" (: #\a ($ (** 3 5 nonl)) #\b)) +(test-ssre "a(.{3,5}?)b" (: #\a ($ (**? 3 5 nonl)) #\b)) +(test-ssre "(?<=aXb)cd" (: (look-behind (: #\a #\X #\b)) #\c #\d)) +(test-ssre "(?<=(.))X" (: (look-behind ($ nonl)) #\X)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "\\D*" (* (~ numeric))) +(test-ssre "\\D*" (* (~ numeric))) +(test-ssre "\\D" (~ numeric)) +(test-ssre ">\\S" (: #\> (~ space))) +(test-ssre "\\d" numeric) +(test-ssre "\\s" space) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "\\D{2,3}" (** 2 3 (~ numeric))) +(test-ssre "\\D{2,3}?" (**? 2 3 (~ numeric))) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d{2,3}" (** 2 3 numeric)) +(test-ssre "\\d{2,3}?" (**? 2 3 numeric)) +(test-ssre "\\S+" (+ (~ space))) +(test-ssre "\\S{2,3}" (** 2 3 (~ space))) +(test-ssre "\\S{2,3}?" (**? 2 3 (~ space))) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s{2,3}<" (: #\> (** 2 3 space) #\<)) +(test-ssre ">\\s{2,3}?<" (: #\> (**? 2 3 space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w{2,3}" (** 2 3 (or alnum #\_))) +(test-ssre "\\w{2,3}?" (**? 2 3 (or alnum #\_))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}" (** 2 3 (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}?" (**? 2 3 (~ (or alnum #\_)))) +(test-ssre "^[ac]*b" (: bos (* (or #\a #\c)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^\\d*b" (: bos (* numeric) #\b)) +(test-ssre "(|a)" ($ (or (:) #\a))) +(test-ssre "\\S\\S" (: (~ space) (~ space))) +(test-ssre "\\S{2}" (= 2 (~ space))) +(test-ssre "\\W\\W" (: (~ (or alnum #\_)) (~ (or alnum #\_)))) +(test-ssre "\\W{2}" (= 2 (~ (or alnum #\_)))) +(test-ssre "\\S" (~ space)) +(test-ssre "\\D" (~ numeric)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre ".[^\\S\n]." (: nonl (~ (or (~ space) #\newline)) nonl)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "^[^d]*?$" (: bos (*? (~ #\d)) eos)) +(test-ssre "A*" (* #\A)) +(test-ssre "." nonl) +(test-ssre "^\\d*\\w{4}" (: bos (* numeric) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^[^b]*\\w{4}" (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) +(test-ssre "^.\\B.\\B." (: bos nonl nwb nonl nwb nonl)) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "^\\w+" (: bos (+ (or alnum #\_)))) +(test-ssre "^\\d+" (: bos (+ numeric))) +(test-ssre "^>\\s+" (: bos #\> (+ space))) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "[RST]+" (+ (or #\R #\S #\T))) +(test-ssre "[R-T]+" (+ (char-range #\R #\T))) +(test-ssre "[q-u]+" (+ (char-range #\q #\u))) +(test-ssre "^s?c" (: bos (? #\s) #\c)) +(test-ssre "[A-`]" (char-range #\A #\`)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\b.+?\\b" (: (or bow eow) (**? 1 #f nonl) (or bow eow))) +(test-ssre "caf\\B.+?\\B" (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) +(test-ssre "c3 b1" (: #\c #\3 #\space #\b #\1)) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "Xa{2,4}b" (: #\X (** 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}?b" (: #\X (**? 2 4 #\a) #\b)) +(test-ssre "Xa{2,4}+b" (: #\X (+ (** 2 4 #\a)) #\b)) +(test-ssre "X\\d{2,4}b" (: #\X (** 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}?b" (: #\X (**? 2 4 numeric) #\b)) +(test-ssre "X\\d{2,4}+b" (: #\X (+ (** 2 4 numeric)) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X\\D{2,4}b" (: #\X (** 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}?b" (: #\X (**? 2 4 (~ numeric)) #\b)) +(test-ssre "X\\D{2,4}+b" (: #\X (+ (** 2 4 (~ numeric))) #\b)) +(test-ssre "X[abc]{2,4}b" (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}?b" (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) +(test-ssre "X[abc]{2,4}+b" (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "X[^a]{2,4}b" (: #\X (** 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}?b" (: #\X (**? 2 4 (~ #\a)) #\b)) +(test-ssre "X[^a]{2,4}+b" (: #\X (+ (** 2 4 (~ #\a))) #\b)) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "X\\W{3}X" (: #\X (= 3 (~ (or alnum #\_))) #\X)) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "f.*" (: #\f (* nonl))) +(test-ssre "(?ss)|(?kk)) \\k" (: (or (-> A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A))) +(test-ssre "(?:(?s)|(?k)) \\k{3,}!" (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!)) +(test-ssre "i" #\i) +(test-ssre "I" #\I) +(test-ssre "[i]" #\i) +(test-ssre "[^i]" (~ #\i)) +(test-ssre "[zi]" (or #\z #\i)) +(test-ssre "[iI]" (or #\i #\I)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\bABC\\b" (: (or bow eow) #\A #\B #\C (or bow eow))) +(test-ssre "\\bABC\\b" (: (or bow eow) #\A #\B #\C (or bow eow))) +(test-ssre "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) +(test-ssre "^(abc){1,2}zz" (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}c" (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?bc" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c)) +(test-ssre "^(b*|ba){1,2}?bc" (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c)) +(test-ssre "^(ba|b*){1,2}?bc" (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) +(test-ssre "^[ab\\]cde]" (: bos (or #\a #\b #\] #\c #\d #\e))) +(test-ssre "^[]cde]" (: bos (or #\] #\c #\d #\e))) +(test-ssre "^[^ab\\]cde]" (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) +(test-ssre "^[^]cde]" (: bos (~ (or #\] #\c #\d #\e)))) +(test-ssre "^@" (: bos #\@)) +(test-ssre "^[0-9]+$" (: bos (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.*nter" (: bos (* nonl) #\n #\t #\e #\r)) +(test-ssre "^xxx[0-9]+$" (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) +(test-ssre "^.+[0-9][0-9][0-9]$" (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^.+?[0-9][0-9][0-9]$" (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) +(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) +(test-ssre ":" #\:) +(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) +(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) +(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) +(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) +(test-ssre "^(?=ab(de))(abd)(e)" (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) +(test-ssre "^(?!(ab)de|x)(abd)(f)" (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) +(test-ssre "^(?=(ab(cd)))(ab)" (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) +(test-ssre "^$" (: bos eos)) +(test-ssre "(?x)^ a\\ b[c ]d $" (: bos #\a #\space #\b (or #\c #\space) #\d eos)) +(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) +(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) +(test-ssre "^[.^$|()*+?{,}]+" (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) +(test-ssre "^a*\\w" (: bos (* #\a) (or alnum #\_))) +(test-ssre "^a*?\\w" (: bos (*? #\a) (or alnum #\_))) +(test-ssre "^a+\\w" (: bos (+ #\a) (or alnum #\_))) +(test-ssre "^a+?\\w" (: bos (**? 1 #f #\a) (or alnum #\_))) +(test-ssre "^\\d{8}\\w{2,}" (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) +(test-ssre "^[aeiou\\d]{4,5}$" (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) +(test-ssre "^[aeiou\\d]{4,5}?" (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) +(test-ssre "^12.34" (: bos #\1 #\2 nonl #\3 #\4)) +(test-ssre "foo(?!bar)(.*)" (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) +(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "(?!^)abc" (: (neg-look-ahead bos) #\a #\b #\c)) +(test-ssre "(?=^)abc" (: (look-ahead bos) #\a #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) +(test-ssre "^[W-c]+$" (: bos (+ (char-range #\W #\c)) eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "\\Aabc\\Z" (: bos #\a #\b #\c (: (? #\newline) eos))) +(test-ssre "\\A(.)*\\Z" (: bos (* ($ nonl)) (: (? #\newline) eos))) +(test-ssre "(?:b)|(?::+)" (or #\b (+ #\:))) +(test-ssre "[-az]+" (+ (or #\- #\a #\z))) +(test-ssre "[az-]+" (+ (or #\a #\z #\-))) +(test-ssre "[a\\-z]+" (+ (or #\a #\- #\z))) +(test-ssre "[a-z]+" (+ (char-range #\a #\z))) +(test-ssre "[\\d-]+" (+ (or numeric #\-))) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "a{0}bc" (: (= 0 #\a) #\b #\c)) +(test-ssre "(a|(bc)){0,0}?xyz" (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "[^k]$" (: (~ #\k) eos)) +(test-ssre "[^k]{2,3}$" (: (** 2 3 (~ #\k)) eos)) +(test-ssre "^\\d{8,}@.+[^k]$" (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "[^az]" (~ (or #\a #\z))) +(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) +(test-ssre "P[^*]TAIRE[^*]{1,}?LL" (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) +(test-ssre "(\\.\\d\\d[1-9]?)\\d+" (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) +(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) +(test-ssre "foo(.*)bar" (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) +(test-ssre "foo(.*?)bar" (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) +(test-ssre "(.*)(\\d+)" (: ($ (* nonl)) ($ (+ numeric)))) +(test-ssre "(.*?)(\\d+)" (: ($ (*? nonl)) ($ (+ numeric)))) +(test-ssre "(.*)(\\d+)$" (: ($ (* nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*?)(\\d+)$" (: ($ (*? nonl)) ($ (+ numeric)) eos)) +(test-ssre "(.*)\\b(\\d+)$" (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) +(test-ssre "(.*\\D)(\\d+)$" (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) +(test-ssre "^\\D*(?!123)" (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^(\\D*)(?=\\d)(?!123)" (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) +(test-ssre "^[W-\\]46]" (: bos (or (char-range #\W #\]) #\4 #\6))) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) +(test-ssre "^(a){0,0}" (: bos (= 0 ($ #\a)))) +(test-ssre "^(a){0,1}" (: bos (** 0 1 ($ #\a)))) +(test-ssre "^(a){0,2}" (: bos (** 0 2 ($ #\a)))) +(test-ssre "^(a){0,3}" (: bos (** 0 3 ($ #\a)))) +(test-ssre "^(a){0,}" (: bos (>= 0 ($ #\a)))) +(test-ssre "^(a){1,1}" (: bos (= 1 ($ #\a)))) +(test-ssre "^(a){1,2}" (: bos (** 1 2 ($ #\a)))) +(test-ssre "^(a){1,3}" (: bos (** 1 3 ($ #\a)))) +(test-ssre "^(a){1,}" (: bos (>= 1 ($ #\a)))) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".{0,}\\.gif" (: (>= 0 nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*\\.gif" (: (* nonl) #\. #\g #\i #\f)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre ".*$" (: (* nonl) eos)) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "(.*X|^B)" ($ (or (: (* nonl) #\X) (: bos #\B)))) +(test-ssre "^.*B" (: bos (* nonl) #\B)) +(test-ssre "(?m)^.*B" (: bol (* nonl) #\B)) +(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) +(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) +(test-ssre "^[abc]{12}" (: bos (= 12 (or #\a #\b #\c)))) +(test-ssre "^[a-c]{12}" (: bos (= 12 (char-range #\a #\c)))) +(test-ssre "^(a|b|c){12}" (: bos (= 12 ($ (or #\a #\b #\c))))) +(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) +(test-ssre "abcde{0,0}" (: #\a #\b #\c #\d (= 0 #\e))) +(test-ssre "ab[cd]{0,0}e" (: #\a #\b (= 0 (or #\c #\d)) #\e)) +(test-ssre "ab(c){0,0}d" (: #\a #\b (= 0 ($ #\c)) #\d)) +(test-ssre "a(b*)" (: #\a ($ (* #\b)))) +(test-ssre "ab\\d{0}e" (: #\a #\b (= 0 numeric) #\e)) +(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) +(test-ssre ".*?" (*? nonl)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "\\b" (or bow eow)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a[^a]b" (: #\a (~ #\a) #\b)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "^(b+?|a){1,2}?c" (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) +(test-ssre "^(b+|a){1,2}?c" (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) +(test-ssre "(?!\\A)x" (: (neg-look-ahead bos) #\x)) +(test-ssre "(A|B)*CD" (: (* ($ (or #\A #\B))) #\C #\D)) +(test-ssre "(?= 0 #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab+bc" (: #\a (+ #\b) #\b #\c)) +(test-ssre "ab{1,}bc" (: #\a (>= 1 #\b) #\b #\c)) +(test-ssre "ab{1,3}bc" (: #\a (** 1 3 #\b) #\b #\c)) +(test-ssre "ab{3,4}bc" (: #\a (** 3 4 #\b) #\b #\c)) +(test-ssre "ab{4,5}bc" (: #\a (** 4 5 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab{0,1}bc" (: #\a (** 0 1 #\b) #\b #\c)) +(test-ssre "ab?bc" (: #\a (? #\b) #\b #\c)) +(test-ssre "ab?c" (: #\a (? #\b) #\c)) +(test-ssre "ab{0,1}c" (: #\a (** 0 1 #\b) #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "^abc" (: bos #\a #\b #\c)) +(test-ssre "^abc$" (: bos #\a #\b #\c eos)) +(test-ssre "abc$" (: #\a #\b #\c eos)) +(test-ssre "^" bos) +(test-ssre "$" eos) +(test-ssre "a.c" (: #\a nonl #\c)) +(test-ssre "a.*c" (: #\a (* nonl) #\c)) +(test-ssre "a[bc]d" (: #\a (or #\b #\c) #\d)) +(test-ssre "a[b-d]e" (: #\a (char-range #\b #\d) #\e)) +(test-ssre "a[b-d]" (: #\a (char-range #\b #\d))) +(test-ssre "a[-b]" (: #\a (or #\- #\b))) +(test-ssre "a[b-]" (: #\a (or #\b #\-))) +(test-ssre "a[]]b" (: #\a #\] #\b)) +(test-ssre "a[^bc]d" (: #\a (~ (or #\b #\c)) #\d)) +(test-ssre "a[^-b]c" (: #\a (~ (or #\- #\b)) #\c)) +(test-ssre "a[^]b]c" (: #\a (~ (or #\] #\b)) #\c)) +(test-ssre "\\ba\\b" (: (or bow eow) #\a (or bow eow))) +(test-ssre "\\by\\b" (: (or bow eow) #\y (or bow eow))) +(test-ssre "\\Ba\\B" (: nwb #\a nwb)) +(test-ssre "\\By\\b" (: nwb #\y (or bow eow))) +(test-ssre "\\by\\B" (: (or bow eow) #\y nwb)) +(test-ssre "\\By\\B" (: nwb #\y nwb)) +(test-ssre "\\w" (or alnum #\_)) +(test-ssre "\\W" (~ (or alnum #\_))) +(test-ssre "a\\sb" (: #\a space #\b)) +(test-ssre "a\\Sb" (: #\a (~ space) #\b)) +(test-ssre "\\d" numeric) +(test-ssre "\\D" (~ numeric)) +(test-ssre "ab|cd" (or (: #\a #\b) (: #\c #\d))) +(test-ssre "()ef" (: ($ (:)) #\e #\f)) +(test-ssre "$b" (: eos #\b)) +(test-ssre "a\\(b" (: #\a #\( #\b)) +(test-ssre "a\\(*b" (: #\a (* #\() #\b)) +(test-ssre "a\\\\b" (: #\a #\\ #\b)) +(test-ssre "((a))" ($ ($ #\a))) +(test-ssre "(a)b(c)" (: ($ #\a) #\b ($ #\c))) +(test-ssre "a+b+c" (: (+ #\a) (+ #\b) #\c)) +(test-ssre "a{1,}b{1,}c" (: (>= 1 #\a) (>= 1 #\b) #\c)) +(test-ssre "a.+?c" (: #\a (**? 1 #f nonl) #\c)) +(test-ssre "(a+|b)*" (* ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,}" (>= 0 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)+" (+ ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){1,}" (>= 1 ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b)?" (? ($ (or (+ #\a) #\b)))) +(test-ssre "(a+|b){0,1}" (** 0 1 ($ (or (+ #\a) #\b)))) +(test-ssre "[^ab]*" (* (~ (or #\a #\b)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "a*" (* #\a)) +(test-ssre "([abc])*d" (: (* ($ (or #\a #\b #\c))) #\d)) +(test-ssre "([abc])*bcd" (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) +(test-ssre "a|b|c|d|e" (or #\a #\b #\c #\d #\e)) +(test-ssre "(a|b|c|d|e)f" (: ($ (or #\a #\b #\c #\d #\e)) #\f)) +(test-ssre "abcd*efg" (: #\a #\b #\c (* #\d) #\e #\f #\g)) +(test-ssre "ab*" (: #\a (* #\b))) +(test-ssre "(ab|cd)e" (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "[abhgefdc]ij" (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) +(test-ssre "^(ab|cd)e" (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) +(test-ssre "(abc|)ef" (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) +(test-ssre "(a|b)c*d" (: ($ (or #\a #\b)) (* #\c) #\d)) +(test-ssre "(ab|ab*)bc" (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) +(test-ssre "a([bc]*)c*" (: #\a ($ (* (or #\b #\c))) (* #\c))) +(test-ssre "a([bc]*)(c*d)" (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]+)(c*d)" (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) +(test-ssre "a([bc]*)(c+d)" (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) +(test-ssre "a[bcd]*dcdcde" (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "a[bcd]+dcdcde" (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) +(test-ssre "(ab|a)b*c" (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) +(test-ssre "((a)(b)c)(d)" (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) +(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) +(test-ssre "^a(bc+|b[eh])g|.h$" (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) +(test-ssre "(bc+d$|ef*g.|h?i(j|k))" ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) +(test-ssre "((((((((((a))))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) +(test-ssre "(((((((((a)))))))))" ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) +(test-ssre "multiple words of text" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) +(test-ssre "multiple words" (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) +(test-ssre "(.*)c(.*)" (: ($ (* nonl)) #\c ($ (* nonl)))) +(test-ssre "\\((.*), (.*)\\)" (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) +(test-ssre "[k]" #\k) +(test-ssre "abcd" (: #\a #\b #\c #\d)) +(test-ssre "a(bc)d" (: #\a ($ (: #\b #\c)) #\d)) +(test-ssre "a[-]?c" (: #\a (? #\-) #\c)) +(test-ssre "a(?!b)." (: #\a (neg-look-ahead #\b) nonl)) +(test-ssre "a(?=d)." (: #\a (look-ahead #\d) nonl)) +(test-ssre "a(?=c|d)." (: #\a (look-ahead (or #\c #\d)) nonl)) +(test-ssre "a(?:b|c|d)(.)" (: #\a (or #\b #\c #\d) ($ nonl))) +(test-ssre "a(?:b|c|d)*(.)" (: #\a (* (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+?(.)" (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d)+(.)" (: #\a (+ (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){2}(.)" (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}(.)" (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){4,5}?(.)" (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "((foo)|(bar))*" (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)))))) +(test-ssre "a(?:b|c|d){6,7}(.)" (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){6,7}?(.)" (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}(.)" (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,6}?(.)" (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}(.)" (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|c|d){5,7}?(.)" (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) +(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) +(test-ssre "^(.+)?B" (: bos (? ($ (+ nonl))) #\B)) +(test-ssre "^([^a-z])|(\\^)$" (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) +(test-ssre "^[<>]&" (: bos (or #\< #\>) #\&)) +(test-ssre "(?:(f)(o)(o)|(b)(a)(r))*" (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r))))) +(test-ssre "(?<=a)b" (: (look-behind #\a) #\b)) +(test-ssre "(?a+)ab" (: ($ (: #\> (+ #\a))) #\a #\b)) +(test-ssre "a\\z" (: #\a eos)) +(test-ssre "(?<=\\d{3}(?!999))foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) +(test-ssre "(?<=(?!...999)\\d{3})foo" (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}(?!999)...)foo" (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) +(test-ssre "(?<=\\d{3}...)(?= 2 (or #\a #\b))) +(test-ssre "[ab]{2,}?" (**? 2 #f (or #\a #\b))) +(test-ssre "abc(?=xyz)" (: #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "a\\b" (: #\a (or bow eow))) +(test-ssre "abc(?=abcde)(?=ab)" (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) +(test-ssre "a*?b*?" (: (*? #\a) (*? #\b))) +(test-ssre "(a)(b)|(c)" (or (: ($ #\a) ($ #\b)) ($ #\c))) +(test-ssre "(?aa)" (-> A (: #\a #\a))) +(test-ssre "a(b)c(d)" (: #\a ($ #\b) #\c ($ #\d))) +(test-ssre "^" bos) +(test-ssre "(02-)?[0-9]{3}-[0-9]{3}" (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9)))) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "abc" (: #\a #\b #\c)) +(test-ssre "abc|bcd" (or (: #\a #\b #\c) (: #\b #\c #\d))) +(test-ssre "(?<=abc|)" (look-behind (or (: #\a #\b #\c) (:)))) +(test-ssre "(?<=abc|)" (look-behind (or (: #\a #\b #\c) (:)))) +(test-ssre "(?<=|abc)" (look-behind (or (:) (: #\a #\b #\c)))) +(test-ssre "[abc]" (or #\a #\b #\c)) +(test-ssre "foobar" (: #\f #\o #\o #\b #\a #\r)) +(test-ssre "foobar" (: #\f #\o #\o #\b #\a #\r)) +(test-ssre "(?<=pqr)abc(?=xyz)" (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) +(test-ssre "\\z" eos) +(test-ssre "\\Z" (: (? #\newline) eos)) +(test-ssre "(?<=(?=.(?<=x)))" (look-behind (look-ahead (: nonl (look-behind #\x))))) +(test-ssre "(?![ab]).*" (: (neg-look-ahead (or #\a #\b)) (* nonl))) +(test-ssre "[a[]" (or #\a #\[)) +(test-ssre "\\bX" (: (or bow eow) #\X)) +(test-ssre "\\BX" (: nwb #\X)) +(test-ssre "X\\b" (: #\X (or bow eow))) +(test-ssre "X\\B" (: #\X nwb)) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "a.b" (: #\a nonl #\b)) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*?)(.)" (: #\a ($ (*? nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.*)(.)" (: #\a ($ (* nonl)) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.)(.)" (: #\a ($ nonl) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.?)(.)" (: #\a ($ (? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.??)(.)" (: #\a ($ (?? nonl)) ($ nonl))) +(test-ssre "a(.{3})b" (: #\a ($ (= 3 nonl)) #\b)) +(test-ssre "a(.{3,})b" (: #\a ($ (>= 3 nonl)) #\b)) +(test-ssre "a(.{3,}?)b" (: #\a ($ (**? 3 #f nonl)) #\b)) +(test-ssre "a(.{3,5})b" (: #\a ($ (** 3 5 nonl)) #\b)) +(test-ssre "a(.{3,5}?)b" (: #\a ($ (**? 3 5 nonl)) #\b)) +(test-ssre "(?<=aXb)cd" (: (look-behind (: #\a #\X #\b)) #\c #\d)) +(test-ssre "(?<=(.))X" (: (look-behind ($ nonl)) #\X)) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "[^a]+" (+ (~ #\a))) +(test-ssre "^[^a]{2}" (: bos (= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}" (: bos (>= 2 (~ #\a)))) +(test-ssre "^[^a]{2,}?" (: bos (**? 2 #f (~ #\a)))) +(test-ssre "\\D" (~ numeric)) +(test-ssre ">\\S" (: #\> (~ space))) +(test-ssre "\\d" numeric) +(test-ssre "\\s" space) +(test-ssre "\\D+" (+ (~ numeric))) +(test-ssre "\\D{2,3}" (** 2 3 (~ numeric))) +(test-ssre "\\D{2,3}?" (**? 2 3 (~ numeric))) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d{2,3}" (** 2 3 numeric)) +(test-ssre "\\d{2,3}?" (**? 2 3 numeric)) +(test-ssre "\\S+" (+ (~ space))) +(test-ssre "\\S{2,3}" (** 2 3 (~ space))) +(test-ssre "\\S{2,3}?" (**? 2 3 (~ space))) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s{2,3}<" (: #\> (** 2 3 space) #\<)) +(test-ssre ">\\s{2,3}?<" (: #\> (**? 2 3 space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w{2,3}" (** 2 3 (or alnum #\_))) +(test-ssre "\\w{2,3}?" (**? 2 3 (or alnum #\_))) +(test-ssre "\\W+" (+ (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}" (** 2 3 (~ (or alnum #\_)))) +(test-ssre "\\W{2,3}?" (**? 2 3 (~ (or alnum #\_)))) +(test-ssre "^[ac]*b" (: bos (* (or #\a #\c)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^[^x]*b" (: bos (* (~ #\x)) #\b)) +(test-ssre "^\\d*b" (: bos (* numeric) #\b)) +(test-ssre "(|a)" ($ (or (:) #\a))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abcd*" (: #\a #\b #\c (* #\d))) +(test-ssre "abc\\d*" (: #\a #\b #\c (* numeric))) +(test-ssre "abc[de]*" (: #\a #\b #\c (* (or #\d #\e)))) +(test-ssre "\\bthe cat\\b" (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) +(test-ssre "[\\p{Nd}]" numeric) +(test-ssre "[\\p{Nd}+-]+" (+ (or numeric #\+ #\-))) +(test-ssre "[\\P{Nd}]+" (+ (~ numeric))) +(test-ssre "^[\\p{Vowel}]" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^[\\p{Any}]X" (: bos any #\X)) +(test-ssre "^[\\P{Any}]X" (: bos (~ any) #\X)) +(test-ssre "^[\\p{Any}]?X" (: bos (? any) #\X)) +(test-ssre "[.\\p{Lu}][.\\p{Ll}][.\\P{Lu}][.\\P{Ll}]" (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) +(test-ssre "[\\p{L}]" alpha) +(test-ssre "[\\P{L}]" (~ alpha)) +(test-ssre "[\\pLu]" (or alpha #\u)) +(test-ssre "[\\PLu]" (or (~ alpha) #\u)) +(test-ssre "\\p{Nd}" numeric) +(test-ssre "\\p{Nd}+" (+ numeric)) +(test-ssre "\\P{Nd}+" (+ (~ numeric))) +(test-ssre "^\\p{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^\\p{Any}X" (: bos any #\X)) +(test-ssre "^\\P{Any}X" (: bos (~ any) #\X)) +(test-ssre "^\\p{Any}?X" (: bos (? any) #\X)) +(test-ssre "\\p{L}" alpha) +(test-ssre "\\P{L}" (~ alpha)) +(test-ssre "\\pLu" (: alpha #\u)) +(test-ssre "\\PLu" (: (~ alpha) #\u)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "\\b...\\B" (: (or bow eow) nonl nonl nonl nwb)) +(test-ssre "ist" (: #\i #\s #\t)) +(test-ssre "is+t" (: #\i (+ #\s) #\t)) +(test-ssre "is+?t" (: #\i (**? 1 #f #\s) #\t)) +(test-ssre "is?t" (: #\i (? #\s) #\t)) +(test-ssre "is{2}t" (: #\i (= 2 #\s) #\t)) +(test-ssre "^A\\s+Z" (: bos #\A (+ space) #\Z)) +(test-ssre "AskZ" (: #\A #\s #\k #\Z)) +(test-ssre "[AskZ]+" (+ (or #\A #\s #\k #\Z))) +(test-ssre "[^s]+" (+ (~ #\s))) +(test-ssre "[^s]+" (+ (~ #\s))) +(test-ssre "[^k]+" (+ (~ #\k))) +(test-ssre "[^k]+" (+ (~ #\k))) +(test-ssre "[^sk]+" (+ (~ (or #\s #\k)))) +(test-ssre "[^sk]+" (+ (~ (or #\s #\k)))) +(test-ssre "i" #\i) +(test-ssre "I" #\I) +(test-ssre "[i]" #\i) +(test-ssre "[zi]" (or #\z #\i)) +(test-ssre "[iI]" (or #\i #\I)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre "\\d+" (+ numeric)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre ">\\s+<" (: #\> (+ space) #\<)) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\w+" (+ (or alnum #\_))) +(test-ssre "\\b.+?\\b" (: (or bow eow) (**? 1 #f nonl) (or bow eow))) +(test-ssre "caf\\B.+?\\B" (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) +(test-ssre "x{1,3}+" (+ (** 1 3 #\x))) +(test-ssre "[a]" #\a) +(test-ssre "[^a]" (~ #\a)) +(test-ssre "(?<=C\n)^" (: (look-behind (: #\C #\newline)) bos)) +(test-ssre "\\w+(?=\t)" (: (+ (or alnum #\_)) (look-ahead #\tab))) + +; new set notation tests -; add some random definitions to pass the tests -(string-sre-definitions - (string-sre-bind 'Any 'cset 'any - (string-sre-bind 'Nd 'cset 'numeric - (string-sre-bind 'vowel 'cset '(or #\a #\e #\i #\o #\u #\y #\w) - (string-sre-bind 'Vowel 'cset '(or #\A #\E #\I #\O #\U #\Y #\W) - (string-sre-bind 'L 'cset 'alpha - (string-sre-bind 'Ll 'cset 'lower - (string-sre-bind 'Lu 'cset 'upper - (string-sre-definitions))))))))) +(test-ssre "{Nd}" numeric) +(test-ssre "{Nd|[+]|[-]}+" (+ (or numeric #\+ #\-))) +(test-ssre "{~Nd}+" (+ (~ numeric))) +(test-ssre "^{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^{Any}X" (: bos any #\X)) +(test-ssre "^{~Any}X" (: bos (~ any) #\X)) +(test-ssre "^{Any}?X" (: bos (? any) #\X)) +(test-ssre "{[.]|Lu}{[.]|Ll}{[.]|~Lu}{[.]|~Ll}" (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) +(test-ssre "{L}" alpha) +(test-ssre "{~L}" (~ alpha)) +(test-ssre "{L}u" (: alpha #\u)) +(test-ssre "{~L}u" (: (~ alpha) #\u)) +(test-ssre "{L-Vowel}u" (: (- alpha (or #\A #\E #\I #\O #\U #\Y #\W)) #\u)) +(test-ssre "{Nd}" numeric) +(test-ssre "{Nd}+" (+ numeric)) +(test-ssre "{~Nd}+" (+ (~ numeric))) +(test-ssre "^{Vowel}" (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) +(test-ssre "^{Any}X" (: bos any #\X)) +(test-ssre "^{~Any}X" (: bos (~ any) #\X)) +(test-ssre "^{Any}?X" (: bos (? any) #\X)) +(test-ssre "{u}{l|d}*" (: upper (* (or lower numeric)))) +(test-ssre "{~d}{an|[']}*" (: (~ numeric) (* (or alnum #\')))) +(test-ssre "{<}{u&~Vowel|d}{!b}{an-d}*{>}" (: bow (or (- upper (or #\A #\E #\I #\O #\U #\Y #\W)) numeric) nwb (* (- alnum numeric)) eow)) +(test-ssre "{}\\X*" (: grapheme (* grapheme))) +; selected corner cases +(test-ssre "a{}b" (: #\a (or) #\b)) +(test-ssre "a{{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}|{}|{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}&{}}b" (: #\a (or) #\b)) +(test-ssre "a{{}&{{}-{}-{}}}b" (: #\a (or) #\b)) +(test-ssre "{?i-u:?u:an&{l|d}}" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "(?x){ ?i-u: ?u: an & { l | d } }" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "(?x){ ?i-u: ?u: an & { l | d } }" (w/nocase (w/ascii (w/unicode (and alnum (or lower numeric)))))) +(test-ssre "{?x: an & { ?i-u: l | d } }" (and alnum (w/nocase (w/ascii (or lower numeric))))) +(test-ssre "{an&{?i:{?-u:l|d}}}" (and alnum (w/nocase (w/ascii (or lower numeric))))) +(test-ssre "(?i)(?-u){l|d}" (w/nocase (w/ascii (or lower numeric)))) -(test-ssre "the quick brown fox" () (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x)) -(test-ssre "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz" () (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) -(test-ssre "^(abc){1,2}zz" () (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}c" () (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "^(ba|b*){1,2}?bc" () (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) -(test-ssre "^[ab\\]cde]" () (: bos (or #\a #\b #\] #\c #\d #\e))) -(test-ssre "^[]cde]" () (: bos (or #\] #\c #\d #\e))) -(test-ssre "^[^ab\\]cde]" () (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) -(test-ssre "^[^]cde]" () (: bos (~ (or #\] #\c #\d #\e)))) -(test-ssre "^@" () (: bos #\@)) -(test-ssre "^[0-9]+$" () (: bos (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.*nter" () (: bos (* nonl) #\n #\t #\e #\r)) -(test-ssre "^xxx[0-9]+$" () (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.+[0-9][0-9][0-9]$" () (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^.+?[0-9][0-9][0-9]$" () (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" () (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) -(test-ssre ":" () #\:) -(test-ssre "([\\da-f:]+)$" () (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos)) -(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" () (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" () (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) -(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" () (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) -(test-ssre "^(?=ab(de))(abd)(e)" () (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) -(test-ssre "^(?!(ab)de|x)(abd)(f)" () (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) -(test-ssre "^(?=(ab(cd)))(ab)" () (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) -(test-ssre "^[\\da-f](\\.[\\da-f])*$" () (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos)) -(test-ssre "^\".*\"\\s*(;.*)?$" () (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos)) -(test-ssre "^$" () (: bos eos)) -(test-ssre "^ a\\ b[c ]d $" (x) (: bos #\a #\space #\b (or #\c #\space) #\d eos)) -(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" () (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) -(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" () (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) -(test-ssre "^[\\w][\\W][\\s][\\S][\\d][\\D]\\]" () (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\])) -(test-ssre "^[.^$|()*+?{,}]+" () (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) -(test-ssre "^a*\\w" () (: bos (* #\a) (or alnum #\_))) -(test-ssre "^a*?\\w" () (: bos (*? #\a) (or alnum #\_))) -(test-ssre "^a+\\w" () (: bos (+ #\a) (or alnum #\_))) -(test-ssre "^a+?\\w" () (: bos (**? 1 #f #\a) (or alnum #\_))) -(test-ssre "^\\d{8}\\w{2,}" () (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) -(test-ssre "^[aeiou\\d]{4,5}$" () (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) -(test-ssre "^[aeiou\\d]{4,5}?" () (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) -(test-ssre "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]" () (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" () (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric)) -(test-ssre "^12.34" () (: bos #\1 #\2 nonl #\3 #\4)) -(test-ssre "foo(?!bar)(.*)" () (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) -(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" () (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "(?!^)abc" () (: (neg-look-ahead bos) #\a #\b #\c)) -(test-ssre "(?=^)abc" () (: (look-ahead bos) #\a #\b #\c)) -(test-ssre "^[ab]{1,3}(ab*|b)" () (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) -(test-ssre "^[ab]{1,3}?(ab*|b)" () (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (* #\b)) #\b)))) -(test-ssre "^[ab]{1,3}?(ab*?|b)" () (: bos (**? 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) -(test-ssre "^[ab]{1,3}(ab*?|b)" () (: bos (** 1 3 (or #\a #\b)) ($ (or (: #\a (*? #\b)) #\b)))) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" () (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) -(test-ssre "^[W-c]+$" () (: bos (+ (char-range #\W #\c)) eos)) -(test-ssre "^[?-_]+$" () (: bos (+ (char-range #\? #\_)) eos)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "\\Aabc\\z" () (: bos #\a #\b #\c eos)) -(test-ssre "\\A(.)*\\z" () (: bos (* ($ nonl)) eos)) -(test-ssre "(?:b)|(?::+)" () (or #\b (+ #\:))) -(test-ssre "[-az]+" () (+ (or #\- #\a #\z))) -(test-ssre "[az-]+" () (+ (or #\a #\z #\-))) -(test-ssre "[a\\-z]+" () (+ (or #\a #\- #\z))) -(test-ssre "[a-z]+" () (+ (char-range #\a #\z))) -(test-ssre "[\\d-]+" () (+ (or numeric #\-))) -(test-ssre "\\\\" () #\\) -(test-ssre "a{0}bc" () (: (= 0 #\a) #\b #\c)) -(test-ssre "(a|(bc)){0,0}?xyz" () (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) -(test-ssre "^([^a])([^b])([^c]*)([^d]{3,4})" () (: bos ($ (~ #\a)) ($ (~ #\b)) ($ (* (~ #\c))) ($ (** 3 4 (~ #\d))))) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^k]$" () (: (~ #\k) eos)) -(test-ssre "[^k]{2,3}$" () (: (** 2 3 (~ #\k)) eos)) -(test-ssre "^\\d{8,}@.+[^k]$" () (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^az]" () (~ (or #\a #\z))) -(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) -(test-ssre "P[^*]TAIRE[^*]{1,}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) -(test-ssre "(\\.\\d\\d[1-9]?)\\d+" () (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) -(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" () ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) -(test-ssre "\\b(foo)\\s+(\\w+)" () (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_))))) -(test-ssre "foo(.*)bar" () (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) -(test-ssre "foo(.*?)bar" () (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) -(test-ssre "(.*)(\\d*)" () (: ($ (* nonl)) ($ (* numeric)))) -(test-ssre "(.*)(\\d+)" () (: ($ (* nonl)) ($ (+ numeric)))) -(test-ssre "(.*?)(\\d*)" () (: ($ (*? nonl)) ($ (* numeric)))) -(test-ssre "(.*?)(\\d+)" () (: ($ (*? nonl)) ($ (+ numeric)))) -(test-ssre "(.*)(\\d+)$" () (: ($ (* nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*?)(\\d+)$" () (: ($ (*? nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*)\\b(\\d+)$" () (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) -(test-ssre "(.*\\D)(\\d+)$" () (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) -(test-ssre "^\\D*(?!123)" () (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^[W-]46\\]" () (: bos (or #\W #\-) #\4 #\6 #\])) -(test-ssre "^[W-\\]46]" () (: bos (or (char-range #\W #\]) #\4 #\6))) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" () (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" () (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "^(a){0,0}" () (: bos (= 0 ($ #\a)))) -(test-ssre "^(a){0,1}" () (: bos (** 0 1 ($ #\a)))) -(test-ssre "^(a){0,2}" () (: bos (** 0 2 ($ #\a)))) -(test-ssre "^(a){0,3}" () (: bos (** 0 3 ($ #\a)))) -(test-ssre "^(a){0,}" () (: bos (>= 0 ($ #\a)))) -(test-ssre "^(a){1,1}" () (: bos (= 1 ($ #\a)))) -(test-ssre "^(a){1,2}" () (: bos (** 1 2 ($ #\a)))) -(test-ssre "^(a){1,3}" () (: bos (** 1 3 ($ #\a)))) -(test-ssre "^(a){1,}" () (: bos (>= 1 ($ #\a)))) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".{0,}\\.gif" () (: (>= 0 nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "^.*B" () (: bos (* nonl) #\B)) -(test-ssre "(?m)^.*B" () (: bol (* nonl) #\B)) -(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" () (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[abc]{12}" () (: bos (= 12 (or #\a #\b #\c)))) -(test-ssre "^[a-c]{12}" () (: bos (= 12 (char-range #\a #\c)))) -(test-ssre "^(a|b|c){12}" () (: bos (= 12 ($ (or #\a #\b #\c))))) -(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" () (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) -(test-ssre "abcde{0,0}" () (: #\a #\b #\c #\d (= 0 #\e))) -(test-ssre "ab[cd]{0,0}e" () (: #\a #\b (= 0 (or #\c #\d)) #\e)) -(test-ssre "ab(c){0,0}d" () (: #\a #\b (= 0 ($ #\c)) #\d)) -(test-ssre "a(b*)" () (: #\a ($ (* #\b)))) -(test-ssre "ab\\d{0}e" () (: #\a #\b (= 0 numeric) #\e)) -(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" () (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) -(test-ssre ".*?" () (*? nonl)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?c" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "(?!\\A)x" () (: (neg-look-ahead bos) #\x)) -(test-ssre "(A|B)*?CD" () (: (*? ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(A|B)*CD" () (: (* ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(?= 0 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "ab{3,4}bc" () (: #\a (** 3 4 #\b) #\b #\c)) -(test-ssre "ab{4,5}bc" () (: #\a (** 4 5 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab{0,1}bc" () (: #\a (** 0 1 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab?c" () (: #\a (? #\b) #\c)) -(test-ssre "ab{0,1}c" () (: #\a (** 0 1 #\b) #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "^" () bos) -(test-ssre "$" () eos) -(test-ssre "a.c" () (: #\a nonl #\c)) -(test-ssre "a.*c" () (: #\a (* nonl) #\c)) -(test-ssre "a[bc]d" () (: #\a (or #\b #\c) #\d)) -(test-ssre "a[b-d]e" () (: #\a (char-range #\b #\d) #\e)) -(test-ssre "a[b-d]" () (: #\a (char-range #\b #\d))) -(test-ssre "a[-b]" () (: #\a (or #\- #\b))) -(test-ssre "a[b-]" () (: #\a (or #\b #\-))) -(test-ssre "a\\]" () (: #\a #\])) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[^bc]d" () (: #\a (~ (or #\b #\c)) #\d)) -(test-ssre "a[^-b]c" () (: #\a (~ (or #\- #\b)) #\c)) -(test-ssre "a[^]b]c" () (: #\a (~ (or #\] #\b)) #\c)) -(test-ssre "\\ba\\b" () (: (or bow eow) #\a (or bow eow))) -(test-ssre "\\by\\b" () (: (or bow eow) #\y (or bow eow))) -(test-ssre "\\Ba\\B" () (: nwb #\a nwb)) -(test-ssre "\\By\\b" () (: nwb #\y (or bow eow))) -(test-ssre "\\by\\B" () (: (or bow eow) #\y nwb)) -(test-ssre "\\By\\B" () (: nwb #\y nwb)) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "a\\sb" () (: #\a space #\b)) -(test-ssre "a\\Sb" () (: #\a (~ space) #\b)) -(test-ssre "\\d" () numeric) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "ab|cd" () (or (: #\a #\b) (: #\c #\d))) -(test-ssre "()ef" () (: ($ (:)) #\e #\f)) -(test-ssre "$b" () (: eos #\b)) -(test-ssre "a\\(b" () (: #\a #\( #\b)) -(test-ssre "a\\(*b" () (: #\a (* #\() #\b)) -(test-ssre "a\\\\b" () (: #\a #\\ #\b)) -(test-ssre "((a))" () ($ ($ #\a))) -(test-ssre "(a)b(c)" () (: ($ #\a) #\b ($ #\c))) -(test-ssre "a+b+c" () (: (+ #\a) (+ #\b) #\c)) -(test-ssre "a{1,}b{1,}c" () (: (>= 1 #\a) (>= 1 #\b) #\c)) -(test-ssre "a.+?c" () (: #\a (**? 1 #f nonl) #\c)) -(test-ssre "(a+|b)*" () (* ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,}" () (>= 0 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)+" () (+ ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){1,}" () (>= 1 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)?" () (? ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,1}" () (** 0 1 ($ (or (+ #\a) #\b)))) -(test-ssre "[^ab]*" () (* (~ (or #\a #\b)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a*" () (* #\a)) -(test-ssre "([abc])*d" () (: (* ($ (or #\a #\b #\c))) #\d)) -(test-ssre "([abc])*bcd" () (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) -(test-ssre "a|b|c|d|e" () (or #\a #\b #\c #\d #\e)) -(test-ssre "(a|b|c|d|e)f" () (: ($ (or #\a #\b #\c #\d #\e)) #\f)) -(test-ssre "abcd*efg" () (: #\a #\b #\c (* #\d) #\e #\f #\g)) -(test-ssre "ab*" () (: #\a (* #\b))) -(test-ssre "(ab|cd)e" () (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "[abhgefdc]ij" () (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) -(test-ssre "^(ab|cd)e" () (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "(abc|)ef" () (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) -(test-ssre "(a|b)c*d" () (: ($ (or #\a #\b)) (* #\c) #\d)) -(test-ssre "(ab|ab*)bc" () (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) -(test-ssre "a([bc]*)c*" () (: #\a ($ (* (or #\b #\c))) (* #\c))) -(test-ssre "a([bc]*)(c*d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]+)(c*d)" () (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]*)(c+d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) -(test-ssre "a[bcd]*dcdcde" () (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "a[bcd]+dcdcde" () (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "(ab|a)b*c" () (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) -(test-ssre "((a)(b)c)(d)" () (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) -(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" () (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) -(test-ssre "^a(bc+|b[eh])g|.h$" () (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) -(test-ssre "(bc+d$|ef*g.|h?i(j|k))" () ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) -(test-ssre "((((((((((a))))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) -(test-ssre "(((((((((a)))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) -(test-ssre "multiple words of text" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) -(test-ssre "multiple words" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) -(test-ssre "(.*)c(.*)" () (: ($ (* nonl)) #\c ($ (* nonl)))) -(test-ssre "\\((.*), (.*)\\)" () (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) -(test-ssre "[k]" () #\k) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(bc)d" () (: #\a ($ (: #\b #\c)) #\d)) -(test-ssre "a[-]?c" () (: #\a (? #\-) #\c)) -(test-ssre "a(?!b)." () (: #\a (neg-look-ahead #\b) nonl)) -(test-ssre "a(?=d)." () (: #\a (look-ahead #\d) nonl)) -(test-ssre "a(?=c|d)." () (: #\a (look-ahead (or #\c #\d)) nonl)) -(test-ssre "a(?:b|c|d)(.)" () (: #\a (or #\b #\c #\d) ($ nonl))) -(test-ssre "a(?:b|c|d)*(.)" () (: #\a (* (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+?(.)" () (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+(.)" () (: #\a (+ (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){2}(.)" () (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}(.)" () (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}?(.)" () (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}(.)" () (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}?(.)" () (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}(.)" () (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}?(.)" () (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}(.)" () (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}?(.)" () (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" () (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) -(test-ssre "^(.+)?B" () (: bos (? ($ (+ nonl))) #\B)) -(test-ssre "^([^a-z])|(\\^)$" () (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) -(test-ssre "^[<>]&" () (: bos (or #\< #\>) #\&)) -(test-ssre "(?<=a)b" () (: (look-behind #\a) #\b)) -(test-ssre "(?a+)ab" () (: ($ (: #\> (+ #\a))) #\a #\b)) -(test-ssre "b\\z" () (: #\b eos)) -(test-ssre "(?<=\\d{3}(?!999))foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) -(test-ssre "(?<=(?!...999)\\d{3})foo" () (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}(?!999)...)foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_))) -(test-ssre "a*\\d*\\w" () (: (* #\a) (* numeric) (or alnum #\_))) -(test-ssre "a*b *\\w" (x) (: (* #\a) (* #\b) (or alnum #\_))) -(test-ssre "a* b *\\w" (x) (: (* #\a) (* #\b) (or alnum #\_))) -(test-ssre "\\z(?= 2 ($ (** 2 3 #\a)))) #\a)) -(test-ssre "(?=C)" () (look-ahead #\C)) -(test-ssre "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)" () (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9)))))) -(test-ssre "^(a){2,}+(\\w)" () (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_)))) -(test-ssre "^(?:a){2,}+(\\w)" () (: bos (+ (>= 2 #\a)) ($ (or alnum #\_)))) -(test-ssre "\\A.*?(a|bc)" () (: bos (*? nonl) ($ (or #\a (: #\b #\c))))) -(test-ssre "\\A.*?(?:a|bc|d)" () (: bos (*? nonl) (or #\a (: #\b #\c) #\d))) -(test-ssre "(?:.*?a)(?<=ba)" () (: (*? nonl) #\a (look-behind (: #\b #\a)))) -(test-ssre "a(?=bc).|abd" () (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d))) -(test-ssre "\\A.*?(?:a|bc)" () (: bos (*? nonl) (or #\a (: #\b #\c)))) -(test-ssre "^\\d*\\w{4}" () (: bos (* numeric) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^a*\\w{4}" () (: bos (* #\a) (= 4 (or alnum #\_)))) -(test-ssre "(?:(?foo)|(?bar))\\k" () (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "(?A)(?:(?foo)|(?bar))\\k" () (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "(?:x|(?:(xx|yy)+|x|x|x|x|x)|a|a|a)bc" () (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c)) -(test-ssre "\\sabc" () (: space #\a #\b #\c)) -(test-ssre "Z*(|d*){216}" () (: (* #\Z) (= 216 ($ (or (:) (* #\d)))))) -(test-ssre "(?<=a(B){0}c)X" () (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X)) -(test-ssre "a+(?:|b)a" () (: (+ #\a) (or (:) #\b) #\a)) -(test-ssre "X?(R||){3335}" () (: (? #\X) (= 3335 ($ (or #\R (:) (:)))))) -(test-ssre "(?!(b))c|b" () (or (: (neg-look-ahead ($ #\b)) #\c) #\b)) -(test-ssre "(?=(b))b|c" () (or (: (look-ahead ($ #\b)) #\b) #\c)) -(test-ssre "<(?x:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?xxx:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "<(?-x:[a b])>" () (: #\< (or #\a #\space #\b) #\>)) -(test-ssre "[[:digit:]-]+" () (+ (or numeric #\-))) -(test-ssre "(?<=(?=.)?)" () (look-behind (? (look-ahead nonl)))) -(test-ssre "(?<=(?=.){4,5})" () (look-behind (** 4 5 (look-ahead nonl)))) -(test-ssre "(?<=(?=.){4,5}x)" () (look-behind (: (** 4 5 (look-ahead nonl)) #\x))) -(test-ssre " (? \\w+ )* \\. " () (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space)) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "(?<=(?=(?<=a)))b" () (: (look-behind (look-ahead (look-behind #\a))) #\b)) -(test-ssre "(?<=ab?c)..." () (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl)) -(test-ssre "(?<=PQR|ab?c)..." () (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl)) -(test-ssre "(?<=ab?c|PQR)..." () (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl)) -(test-ssre "(?<=PQ|ab?c)..." () (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl)) -(test-ssre "(?<=ab?c|PQ)..." () (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl)) -(test-ssre "(?<=a(b?c|d?e?e)f)X." () (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl)) -(test-ssre "(?= 5 (char-range #\a #\z)) #\b) #\x)) -(test-ssre "[a-z]{1,6}?s|x" () (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x)) -(test-ssre "[@]" () #\@) -(test-ssre "@" () #\@) -(test-ssre "@@@xxx" () (: #\@ #\@ #\@ #\x #\x #\x)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "shortutf" () (: #\s #\h #\o #\r #\t #\u #\t #\f)) -(test-ssre "anything" () (: #\a #\n #\y #\t #\h #\i #\n #\g)) -(test-ssre "badutf" () (: #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=x)badutf" () (: (look-behind #\x) #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=xx)badutf" () (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f)) -(test-ssre "(?<=xxxx)badutf" () (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f)) -(test-ssre "X" () #\X) -(test-ssre "a+" () (+ #\a)) -(test-ssre "A" () #\A) -(test-ssre "x" () #\x) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "X" () #\X) -(test-ssre "(?<=.)X" () (: (look-behind nonl) #\X)) -(test-ssre "a+" () (+ #\a)) -(test-ssre "a" () #\a) -(test-ssre "." () nonl) -(test-ssre "s" () #\s) -(test-ssre "[^s]" () (~ #\s)) -(test-ssre "a(?:.)*?a" () (: #\a (*? nonl) #\a)) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "a\\b" () (: #\a (or bow eow))) -(test-ssre "abc(?=abcde)(?=ab)" () (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) -(test-ssre "(?<=abc)123" () (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) -(test-ssre "\\babc\\b" () (: (or bow eow) #\a #\b #\c (or bow eow))) -(test-ssre "(?<=abc)def" () (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) -(test-ssre "abc(?<=bc)def" () (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f)) -(test-ssre "(?<=ab)cdef" () (: (look-behind (: #\a #\b)) #\c #\d #\e #\f)) -(test-ssre "b(?tom|bon)-\\k" () (: (-> A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A))) -(test-ssre "Xa{2,4}b" () (: #\X (** 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}?b" () (: #\X (**? 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}+b" () (: #\X (+ (** 2 4 #\a)) #\b)) -(test-ssre "X\\d{2,4}b" () (: #\X (** 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}?b" () (: #\X (**? 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}+b" () (: #\X (+ (** 2 4 numeric)) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X[abc]{2,4}b" () (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}?b" () (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}+b" () (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "Z(?!)" () (: #\Z (neg-look-ahead (:)))) -(test-ssre "dog(sbody)?" () (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y))))) -(test-ssre "dog(sbody)??" () (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y))))) -(test-ssre "dog|dogsbody" () (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y))) -(test-ssre "dogsbody|dog" () (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g))) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "(?<=abc)123" () (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3)) -(test-ssre "\\babc\\b" () (: (or bow eow) #\a #\b #\c (or bow eow))) -(test-ssre "a?b?" () (: (? #\a) (? #\b))) -(test-ssre "^a?b?" () (: bos (? #\a) (? #\b))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "(?<=abc)def" () (: (look-behind (: #\a #\b #\c)) #\d #\e #\f)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "abc\\z" () (: #\a #\b #\c eos)) -(test-ssre "abc\\b" () (: #\a #\b #\c (or bow eow))) -(test-ssre "abc\\B" () (: #\a #\b #\c nwb)) -(test-ssre ".+" () (+ nonl)) -(test-ssre "(?<=(abc)+)X" () (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X)) -(test-ssre "(a)b|ac" () (or (: ($ #\a) #\b) (: #\a #\c))) -(test-ssre "(a)(b)x|abc" () (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c))) -(test-ssre "(?:(foo)|(bar)|(baz))X" () (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X)) -(test-ssre "(ab)x|ab" () (or (: ($ (: #\a #\b)) #\x) (: #\a #\b))) -(test-ssre "(((((a)))))" () ($ ($ ($ ($ ($ #\a)))))) -(test-ssre "a*?b*?" () (: (*? #\a) (*? #\b))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a(b)c" () (: #\a ($ #\b) #\c)) -(test-ssre "(a)(b)|(c)" () (or (: ($ #\a) ($ #\b)) ($ #\c))) -(test-ssre "(?a)|(?b)" () (or (-> A #\a) (-> A #\b))) -(test-ssre "a(b)c(d)" () (: #\a ($ #\b) #\c ($ #\d))) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre ".*\\d" () (: (* nonl) numeric)) -(test-ssre "(abc)*" () (* ($ (: #\a #\b #\c)))) -(test-ssre "^" () bos) -(test-ssre "(?:ab)?(?:ab)(?:ab)" () (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "(abcd)" () ($ (: #\a #\b #\c #\d))) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(b)c" () (: #\a ($ #\b) #\c)) -(test-ssre "a[[:punct:]b]" () (: #\a (or punct #\b))) -(test-ssre "a[b[:punct:]]" () (: #\a (or #\b punct))) -(test-ssre "0b 28 3f 2d 78 29 3a" () (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a)) -(test-ssre "a|(b)c" () (or #\a (: ($ #\b) #\c))) -(test-ssre "efg" () (: #\e #\f #\g)) -(test-ssre "eff" () (: #\e #\f #\f)) -(test-ssre "effg" () (: #\e #\f #\f #\g)) -(test-ssre "aaa" () (: #\a #\a #\a)) -(test-ssre "(?)" () (: #\[ ($ (:)) (= 65535 #\]) (-> A (:)))) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "\\z" () eos) -(test-ssre "\\Z" () (: (? #\newline) eos)) -(test-ssre "(?![ab]).*" () (: (neg-look-ahead (or #\a #\b)) (* nonl))) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "12345(?<=\\d{1,256})X" () (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X)) -(test-ssre "(?foo)|(?bar))\\k" () (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n))) -(test-ssre "a?b[]xy]*c" () (: (? #\a) #\b (* (or #\] #\x #\y)) #\c)) -(test-ssre "f*" () (* #\f)) -(test-ssre "foo\\*" () (: #\f #\o #\o #\*)) -(test-ssre "foo\\*bar" () (: #\f #\o #\o #\* #\b #\a #\r)) -(test-ssre "f\\\\oo" () (: #\f #\\ #\o #\o)) -(test-ssre "[ten]" () (or #\t #\e #\n)) -(test-ssre "t[a-g]n" () (: #\t (char-range #\a #\g) #\n)) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[]a-]b" () (: #\a (or #\] #\a #\-) #\b)) -(test-ssre "a[]-]b" () (: #\a (or #\] #\-) #\b)) -(test-ssre "a[]a-z]b" () (: #\a (or #\] (char-range #\a #\z)) #\b)) -(test-ssre "\\]" () #\]) -(test-ssre "t[!a-g]n" () (: #\t (or #\! (char-range #\a #\g)) #\n)) -(test-ssre "A[+-0]B" () (: #\A (char-range #\+ #\0) #\B)) -(test-ssre "a[--0]z" () (: #\a (char-range #\- #\0) #\z)) -(test-ssre "a[[:digit:].]z" () (: #\a (or numeric #\.) #\z)) -(test-ssre "A\\B\\\\C\\D" () (: #\A nwb #\\ #\C (~ numeric))) -(test-ssre "a*b" () (: (* #\a) #\b)) -(test-ssre "<[]bc]>" () (: #\< (or #\] #\b #\c) #\>)) -(test-ssre "<[^]bc]>" () (: #\< (~ (or #\] #\b #\c)) #\>)) -(test-ssre "a*b+c\\+[def](ab)\\(cd\\)" () (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\))) -(test-ssre "how.to how\\.to" () (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o)) -(test-ssre "^how to \\^how to" () (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o)) -(test-ssre "^b\\(c^d\\)\\(^e^f\\)" () (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\))) -(test-ssre "\\[()\\]{65535}()" () (: #\[ ($ (:)) (= 65535 #\]) ($ (:)))) -(test-ssre "^A" () (: bos #\A)) -(test-ssre "^\\w+" () (: bos (+ (or alnum #\_)))) -(test-ssre "(.+)\\b(.+)" () (: ($ (+ nonl)) (or bow eow) ($ (+ nonl)))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.{3,})b" () (: #\a ($ (>= 3 nonl)) #\b)) -(test-ssre "a(.{3,}?)b" () (: #\a ($ (**? 3 #f nonl)) #\b)) -(test-ssre "a(.{3,5})b" () (: #\a ($ (** 3 5 nonl)) #\b)) -(test-ssre "a(.{3,5}?)b" () (: #\a ($ (**? 3 5 nonl)) #\b)) -(test-ssre "(?<=aXb)cd" () (: (look-behind (: #\a #\X #\b)) #\c #\d)) -(test-ssre "(?<=(.))X" () (: (look-behind ($ nonl)) #\X)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "\\D*" () (* (~ numeric))) -(test-ssre "\\D*" () (* (~ numeric))) -(test-ssre "\\D" () (~ numeric)) -(test-ssre ">\\S" () (: #\> (~ space))) -(test-ssre "\\d" () numeric) -(test-ssre "\\s" () space) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "\\D{2,3}" () (** 2 3 (~ numeric))) -(test-ssre "\\D{2,3}?" () (**? 2 3 (~ numeric))) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d{2,3}" () (** 2 3 numeric)) -(test-ssre "\\d{2,3}?" () (**? 2 3 numeric)) -(test-ssre "\\S+" () (+ (~ space))) -(test-ssre "\\S{2,3}" () (** 2 3 (~ space))) -(test-ssre "\\S{2,3}?" () (**? 2 3 (~ space))) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s{2,3}<" () (: #\> (** 2 3 space) #\<)) -(test-ssre ">\\s{2,3}?<" () (: #\> (**? 2 3 space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w{2,3}" () (** 2 3 (or alnum #\_))) -(test-ssre "\\w{2,3}?" () (**? 2 3 (or alnum #\_))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}" () (** 2 3 (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}?" () (**? 2 3 (~ (or alnum #\_)))) -(test-ssre "^[ac]*b" () (: bos (* (or #\a #\c)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^\\d*b" () (: bos (* numeric) #\b)) -(test-ssre "(|a)" () ($ (or (:) #\a))) -(test-ssre "\\S\\S" () (: (~ space) (~ space))) -(test-ssre "\\S{2}" () (= 2 (~ space))) -(test-ssre "\\W\\W" () (: (~ (or alnum #\_)) (~ (or alnum #\_)))) -(test-ssre "\\W{2}" () (= 2 (~ (or alnum #\_)))) -(test-ssre "\\S" () (~ space)) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre ".[^\\S\n]." () (: nonl (~ (or (~ space) #\newline)) nonl)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "^[^d]*?$" () (: bos (*? (~ #\d)) eos)) -(test-ssre "A*" () (* #\A)) -(test-ssre "." () nonl) -(test-ssre "^\\d*\\w{4}" () (: bos (* numeric) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^[^b]*\\w{4}" () (: bos (* (~ #\b)) (= 4 (or alnum #\_)))) -(test-ssre "^.\\B.\\B." () (: bos nonl nwb nonl nwb nonl)) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "^\\w+" () (: bos (+ (or alnum #\_)))) -(test-ssre "^\\d+" () (: bos (+ numeric))) -(test-ssre "^>\\s+" () (: bos #\> (+ space))) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "[RST]+" () (+ (or #\R #\S #\T))) -(test-ssre "[R-T]+" () (+ (char-range #\R #\T))) -(test-ssre "[q-u]+" () (+ (char-range #\q #\u))) -(test-ssre "^s?c" () (: bos (? #\s) #\c)) -(test-ssre "[A-`]" () (char-range #\A #\`)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\b.+?\\b" () (: (or bow eow) (**? 1 #f nonl) (or bow eow))) -(test-ssre "caf\\B.+?\\B" () (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) -(test-ssre "c3 b1" () (: #\c #\3 #\space #\b #\1)) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "Xa{2,4}b" () (: #\X (** 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}?b" () (: #\X (**? 2 4 #\a) #\b)) -(test-ssre "Xa{2,4}+b" () (: #\X (+ (** 2 4 #\a)) #\b)) -(test-ssre "X\\d{2,4}b" () (: #\X (** 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}?b" () (: #\X (**? 2 4 numeric) #\b)) -(test-ssre "X\\d{2,4}+b" () (: #\X (+ (** 2 4 numeric)) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X\\D{2,4}b" () (: #\X (** 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}?b" () (: #\X (**? 2 4 (~ numeric)) #\b)) -(test-ssre "X\\D{2,4}+b" () (: #\X (+ (** 2 4 (~ numeric))) #\b)) -(test-ssre "X[abc]{2,4}b" () (: #\X (** 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}?b" () (: #\X (**? 2 4 (or #\a #\b #\c)) #\b)) -(test-ssre "X[abc]{2,4}+b" () (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "X[^a]{2,4}b" () (: #\X (** 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}?b" () (: #\X (**? 2 4 (~ #\a)) #\b)) -(test-ssre "X[^a]{2,4}+b" () (: #\X (+ (** 2 4 (~ #\a))) #\b)) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "X\\W{3}X" () (: #\X (= 3 (~ (or alnum #\_))) #\X)) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "f.*" () (: #\f (* nonl))) -(test-ssre "(?ss)|(?kk)) \\k" () (: (or (-> A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A))) -(test-ssre "(?:(?s)|(?k)) \\k{3,}!" () (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!)) -(test-ssre "i" () #\i) -(test-ssre "I" () #\I) -(test-ssre "[i]" () #\i) -(test-ssre "[^i]" () (~ #\i)) -(test-ssre "[zi]" () (or #\z #\i)) -(test-ssre "[iI]" () (or #\i #\I)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\bABC\\b" () (: (or bow eow) #\A #\B #\C (or bow eow))) -(test-ssre "\\bABC\\b" () (: (or bow eow) #\A #\B #\C (or bow eow))) -(test-ssre "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z)) -(test-ssre "^(abc){1,2}zz" () (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}c" () (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?bc" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c)) -(test-ssre "^(b*|ba){1,2}?bc" () (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c)) -(test-ssre "^(ba|b*){1,2}?bc" () (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c)) -(test-ssre "^[ab\\]cde]" () (: bos (or #\a #\b #\] #\c #\d #\e))) -(test-ssre "^[]cde]" () (: bos (or #\] #\c #\d #\e))) -(test-ssre "^[^ab\\]cde]" () (: bos (~ (or #\a #\b #\] #\c #\d #\e)))) -(test-ssre "^[^]cde]" () (: bos (~ (or #\] #\c #\d #\e)))) -(test-ssre "^@" () (: bos #\@)) -(test-ssre "^[0-9]+$" () (: bos (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.*nter" () (: bos (* nonl) #\n #\t #\e #\r)) -(test-ssre "^xxx[0-9]+$" () (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos)) -(test-ssre "^.+[0-9][0-9][0-9]$" () (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^.+?[0-9][0-9][0-9]$" () (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos)) -(test-ssre "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$" () (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos)) -(test-ssre ":" () #\:) -(test-ssre "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$" () (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos)) -(test-ssre "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$" () (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos)) -(test-ssre "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$" () (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos)) -(test-ssre "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$" () (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos)) -(test-ssre "^(?=ab(de))(abd)(e)" () (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e))) -(test-ssre "^(?!(ab)de|x)(abd)(f)" () (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f))) -(test-ssre "^(?=(ab(cd)))(ab)" () (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b)))) -(test-ssre "^$" () (: bos eos)) -(test-ssre "^ a\\ b[c ]d $" (x) (: bos #\a #\space #\b (or #\c #\space) #\d eos)) -(test-ssre "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$" () (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos)) -(test-ssre "^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$" () (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos)) -(test-ssre "^[.^$|()*+?{,}]+" () (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\})))) -(test-ssre "^a*\\w" () (: bos (* #\a) (or alnum #\_))) -(test-ssre "^a*?\\w" () (: bos (*? #\a) (or alnum #\_))) -(test-ssre "^a+\\w" () (: bos (+ #\a) (or alnum #\_))) -(test-ssre "^a+?\\w" () (: bos (**? 1 #f #\a) (or alnum #\_))) -(test-ssre "^\\d{8}\\w{2,}" () (: bos (= 8 numeric) (>= 2 (or alnum #\_)))) -(test-ssre "^[aeiou\\d]{4,5}$" () (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos)) -(test-ssre "^[aeiou\\d]{4,5}?" () (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric)))) -(test-ssre "^12.34" () (: bos #\1 #\2 nonl #\3 #\4)) -(test-ssre "foo(?!bar)(.*)" () (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl)))) -(test-ssre "(?:(?!foo)...|^.{0,2})bar(.*)" () (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "(?!^)abc" () (: (neg-look-ahead bos) #\a #\b #\c)) -(test-ssre "(?=^)abc" () (: (look-ahead bos) #\a #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" () (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) -(test-ssre "^[W-c]+$" () (: bos (+ (char-range #\W #\c)) eos)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "\\Aabc\\Z" () (: bos #\a #\b #\c (: (? #\newline) eos))) -(test-ssre "\\A(.)*\\Z" () (: bos (* ($ nonl)) (: (? #\newline) eos))) -(test-ssre "(?:b)|(?::+)" () (or #\b (+ #\:))) -(test-ssre "[-az]+" () (+ (or #\- #\a #\z))) -(test-ssre "[az-]+" () (+ (or #\a #\z #\-))) -(test-ssre "[a\\-z]+" () (+ (or #\a #\- #\z))) -(test-ssre "[a-z]+" () (+ (char-range #\a #\z))) -(test-ssre "[\\d-]+" () (+ (or numeric #\-))) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "a{0}bc" () (: (= 0 #\a) #\b #\c)) -(test-ssre "(a|(bc)){0,0}?xyz" () (: (**? 0 0 ($ (or #\a ($ (: #\b #\c))))) #\x #\y #\z)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "[^k]$" () (: (~ #\k) eos)) -(test-ssre "[^k]{2,3}$" () (: (** 2 3 (~ #\k)) eos)) -(test-ssre "^\\d{8,}@.+[^k]$" () (: bos (>= 8 numeric) #\@ (+ nonl) (~ #\k) eos)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "[^az]" () (~ (or #\a #\z))) -(test-ssre "P[^*]TAIRE[^*]{1,6}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L)) -(test-ssre "P[^*]TAIRE[^*]{1,}?LL" () (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L)) -(test-ssre "(\\.\\d\\d[1-9]?)\\d+" () (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric))) -(test-ssre "(\\.\\d\\d((?=0)|\\d(?=\\d)))" () ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric))))))) -(test-ssre "foo(.*)bar" () (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r)) -(test-ssre "foo(.*?)bar" () (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r)) -(test-ssre "(.*)(\\d+)" () (: ($ (* nonl)) ($ (+ numeric)))) -(test-ssre "(.*?)(\\d+)" () (: ($ (*? nonl)) ($ (+ numeric)))) -(test-ssre "(.*)(\\d+)$" () (: ($ (* nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*?)(\\d+)$" () (: ($ (*? nonl)) ($ (+ numeric)) eos)) -(test-ssre "(.*)\\b(\\d+)$" () (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos)) -(test-ssre "(.*\\D)(\\d+)$" () (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos)) -(test-ssre "^\\D*(?!123)" () (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^(\\D*)(?=\\d)(?!123)" () (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3)))) -(test-ssre "^[W-\\]46]" () (: bos (or (char-range #\W #\]) #\4 #\6))) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,10}otherword" () (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "word (?:[a-zA-Z0-9]+ ){0,300}otherword" () (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d)) -(test-ssre "^(a){0,0}" () (: bos (= 0 ($ #\a)))) -(test-ssre "^(a){0,1}" () (: bos (** 0 1 ($ #\a)))) -(test-ssre "^(a){0,2}" () (: bos (** 0 2 ($ #\a)))) -(test-ssre "^(a){0,3}" () (: bos (** 0 3 ($ #\a)))) -(test-ssre "^(a){0,}" () (: bos (>= 0 ($ #\a)))) -(test-ssre "^(a){1,1}" () (: bos (= 1 ($ #\a)))) -(test-ssre "^(a){1,2}" () (: bos (** 1 2 ($ #\a)))) -(test-ssre "^(a){1,3}" () (: bos (** 1 3 ($ #\a)))) -(test-ssre "^(a){1,}" () (: bos (>= 1 ($ #\a)))) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".{0,}\\.gif" () (: (>= 0 nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*\\.gif" () (: (* nonl) #\. #\g #\i #\f)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre ".*$" () (: (* nonl) eos)) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "(.*X|^B)" () ($ (or (: (* nonl) #\X) (: bos #\B)))) -(test-ssre "^.*B" () (: bos (* nonl) #\B)) -(test-ssre "(?m)^.*B" () (: bol (* nonl) #\B)) -(test-ssre "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" () (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9))) -(test-ssre "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d][\\d]" () (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric)) -(test-ssre "^[abc]{12}" () (: bos (= 12 (or #\a #\b #\c)))) -(test-ssre "^[a-c]{12}" () (: bos (= 12 (char-range #\a #\c)))) -(test-ssre "^(a|b|c){12}" () (: bos (= 12 ($ (or #\a #\b #\c))))) -(test-ssre "^[abcdefghijklmnopqrstuvwxy0123456789]" () (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) -(test-ssre "abcde{0,0}" () (: #\a #\b #\c #\d (= 0 #\e))) -(test-ssre "ab[cd]{0,0}e" () (: #\a #\b (= 0 (or #\c #\d)) #\e)) -(test-ssre "ab(c){0,0}d" () (: #\a #\b (= 0 ($ #\c)) #\d)) -(test-ssre "a(b*)" () (: #\a ($ (* #\b)))) -(test-ssre "ab\\d{0}e" () (: #\a #\b (= 0 numeric) #\e)) -(test-ssre "\"([^\\\\\"]+|\\\\.)*\"" () (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\")) -(test-ssre ".*?" () (*? nonl)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "\\b" () (or bow eow)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a[^a]b" () (: #\a (~ #\a) #\b)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "^(b+?|a){1,2}?c" () (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c)) -(test-ssre "^(b+|a){1,2}?c" () (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c)) -(test-ssre "(?!\\A)x" () (: (neg-look-ahead bos) #\x)) -(test-ssre "(A|B)*CD" () (: (* ($ (or #\A #\B))) #\C #\D)) -(test-ssre "(?= 0 #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab+bc" () (: #\a (+ #\b) #\b #\c)) -(test-ssre "ab{1,}bc" () (: #\a (>= 1 #\b) #\b #\c)) -(test-ssre "ab{1,3}bc" () (: #\a (** 1 3 #\b) #\b #\c)) -(test-ssre "ab{3,4}bc" () (: #\a (** 3 4 #\b) #\b #\c)) -(test-ssre "ab{4,5}bc" () (: #\a (** 4 5 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab{0,1}bc" () (: #\a (** 0 1 #\b) #\b #\c)) -(test-ssre "ab?bc" () (: #\a (? #\b) #\b #\c)) -(test-ssre "ab?c" () (: #\a (? #\b) #\c)) -(test-ssre "ab{0,1}c" () (: #\a (** 0 1 #\b) #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "^abc" () (: bos #\a #\b #\c)) -(test-ssre "^abc$" () (: bos #\a #\b #\c eos)) -(test-ssre "abc$" () (: #\a #\b #\c eos)) -(test-ssre "^" () bos) -(test-ssre "$" () eos) -(test-ssre "a.c" () (: #\a nonl #\c)) -(test-ssre "a.*c" () (: #\a (* nonl) #\c)) -(test-ssre "a[bc]d" () (: #\a (or #\b #\c) #\d)) -(test-ssre "a[b-d]e" () (: #\a (char-range #\b #\d) #\e)) -(test-ssre "a[b-d]" () (: #\a (char-range #\b #\d))) -(test-ssre "a[-b]" () (: #\a (or #\- #\b))) -(test-ssre "a[b-]" () (: #\a (or #\b #\-))) -(test-ssre "a[]]b" () (: #\a #\] #\b)) -(test-ssre "a[^bc]d" () (: #\a (~ (or #\b #\c)) #\d)) -(test-ssre "a[^-b]c" () (: #\a (~ (or #\- #\b)) #\c)) -(test-ssre "a[^]b]c" () (: #\a (~ (or #\] #\b)) #\c)) -(test-ssre "\\ba\\b" () (: (or bow eow) #\a (or bow eow))) -(test-ssre "\\by\\b" () (: (or bow eow) #\y (or bow eow))) -(test-ssre "\\Ba\\B" () (: nwb #\a nwb)) -(test-ssre "\\By\\b" () (: nwb #\y (or bow eow))) -(test-ssre "\\by\\B" () (: (or bow eow) #\y nwb)) -(test-ssre "\\By\\B" () (: nwb #\y nwb)) -(test-ssre "\\w" () (or alnum #\_)) -(test-ssre "\\W" () (~ (or alnum #\_))) -(test-ssre "a\\sb" () (: #\a space #\b)) -(test-ssre "a\\Sb" () (: #\a (~ space) #\b)) -(test-ssre "\\d" () numeric) -(test-ssre "\\D" () (~ numeric)) -(test-ssre "ab|cd" () (or (: #\a #\b) (: #\c #\d))) -(test-ssre "()ef" () (: ($ (:)) #\e #\f)) -(test-ssre "$b" () (: eos #\b)) -(test-ssre "a\\(b" () (: #\a #\( #\b)) -(test-ssre "a\\(*b" () (: #\a (* #\() #\b)) -(test-ssre "a\\\\b" () (: #\a #\\ #\b)) -(test-ssre "((a))" () ($ ($ #\a))) -(test-ssre "(a)b(c)" () (: ($ #\a) #\b ($ #\c))) -(test-ssre "a+b+c" () (: (+ #\a) (+ #\b) #\c)) -(test-ssre "a{1,}b{1,}c" () (: (>= 1 #\a) (>= 1 #\b) #\c)) -(test-ssre "a.+?c" () (: #\a (**? 1 #f nonl) #\c)) -(test-ssre "(a+|b)*" () (* ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,}" () (>= 0 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)+" () (+ ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){1,}" () (>= 1 ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b)?" () (? ($ (or (+ #\a) #\b)))) -(test-ssre "(a+|b){0,1}" () (** 0 1 ($ (or (+ #\a) #\b)))) -(test-ssre "[^ab]*" () (* (~ (or #\a #\b)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "a*" () (* #\a)) -(test-ssre "([abc])*d" () (: (* ($ (or #\a #\b #\c))) #\d)) -(test-ssre "([abc])*bcd" () (: (* ($ (or #\a #\b #\c))) #\b #\c #\d)) -(test-ssre "a|b|c|d|e" () (or #\a #\b #\c #\d #\e)) -(test-ssre "(a|b|c|d|e)f" () (: ($ (or #\a #\b #\c #\d #\e)) #\f)) -(test-ssre "abcd*efg" () (: #\a #\b #\c (* #\d) #\e #\f #\g)) -(test-ssre "ab*" () (: #\a (* #\b))) -(test-ssre "(ab|cd)e" () (: ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "[abhgefdc]ij" () (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j)) -(test-ssre "^(ab|cd)e" () (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e)) -(test-ssre "(abc|)ef" () (: ($ (or (: #\a #\b #\c) (:))) #\e #\f)) -(test-ssre "(a|b)c*d" () (: ($ (or #\a #\b)) (* #\c) #\d)) -(test-ssre "(ab|ab*)bc" () (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c)) -(test-ssre "a([bc]*)c*" () (: #\a ($ (* (or #\b #\c))) (* #\c))) -(test-ssre "a([bc]*)(c*d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]+)(c*d)" () (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d)))) -(test-ssre "a([bc]*)(c+d)" () (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d)))) -(test-ssre "a[bcd]*dcdcde" () (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "a[bcd]+dcdcde" () (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e)) -(test-ssre "(ab|a)b*c" () (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c)) -(test-ssre "((a)(b)c)(d)" () (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d))) -(test-ssre "[a-zA-Z_][a-zA-Z0-9_]*" () (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_)))) -(test-ssre "^a(bc+|b[eh])g|.h$" () (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos))) -(test-ssre "(bc+d$|ef*g.|h?i(j|k))" () ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k)))))) -(test-ssre "((((((((((a))))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))))) -(test-ssre "(((((((((a)))))))))" () ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) -(test-ssre "multiple words of text" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t)) -(test-ssre "multiple words" () (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s)) -(test-ssre "(.*)c(.*)" () (: ($ (* nonl)) #\c ($ (* nonl)))) -(test-ssre "\\((.*), (.*)\\)" () (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\))) -(test-ssre "[k]" () #\k) -(test-ssre "abcd" () (: #\a #\b #\c #\d)) -(test-ssre "a(bc)d" () (: #\a ($ (: #\b #\c)) #\d)) -(test-ssre "a[-]?c" () (: #\a (? #\-) #\c)) -(test-ssre "a(?!b)." () (: #\a (neg-look-ahead #\b) nonl)) -(test-ssre "a(?=d)." () (: #\a (look-ahead #\d) nonl)) -(test-ssre "a(?=c|d)." () (: #\a (look-ahead (or #\c #\d)) nonl)) -(test-ssre "a(?:b|c|d)(.)" () (: #\a (or #\b #\c #\d) ($ nonl))) -(test-ssre "a(?:b|c|d)*(.)" () (: #\a (* (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+?(.)" () (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d)+(.)" () (: #\a (+ (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){2}(.)" () (: #\a (= 2 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}(.)" () (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){4,5}?(.)" () (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "((foo)|(bar))*" () (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)))))) -(test-ssre "a(?:b|c|d){6,7}(.)" () (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){6,7}?(.)" () (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}(.)" () (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,6}?(.)" () (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}(.)" () (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|c|d){5,7}?(.)" () (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl))) -(test-ssre "a(?:b|(c|e){1,2}?|d)+?(.)" () (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl))) -(test-ssre "^(.+)?B" () (: bos (? ($ (+ nonl))) #\B)) -(test-ssre "^([^a-z])|(\\^)$" () (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos))) -(test-ssre "^[<>]&" () (: bos (or #\< #\>) #\&)) -(test-ssre "(?:(f)(o)(o)|(b)(a)(r))*" () (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r))))) -(test-ssre "(?<=a)b" () (: (look-behind #\a) #\b)) -(test-ssre "(?a+)ab" () (: ($ (: #\> (+ #\a))) #\a #\b)) -(test-ssre "a\\z" () (: #\a eos)) -(test-ssre "(?<=\\d{3}(?!999))foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o)) -(test-ssre "(?<=(?!...999)\\d{3})foo" () (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}(?!999)...)foo" () (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o)) -(test-ssre "(?<=\\d{3}...)(?= 2 (or #\a #\b))) -(test-ssre "[ab]{2,}?" () (**? 2 #f (or #\a #\b))) -(test-ssre "abc(?=xyz)" () (: #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "a\\b" () (: #\a (or bow eow))) -(test-ssre "abc(?=abcde)(?=ab)" () (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b)))) -(test-ssre "a*?b*?" () (: (*? #\a) (*? #\b))) -(test-ssre "(a)(b)|(c)" () (or (: ($ #\a) ($ #\b)) ($ #\c))) -(test-ssre "(?aa)" () (-> A (: #\a #\a))) -(test-ssre "a(b)c(d)" () (: #\a ($ #\b) #\c ($ #\d))) -(test-ssre "^" () bos) -(test-ssre "(02-)?[0-9]{3}-[0-9]{3}" () (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9)))) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "abc" () (: #\a #\b #\c)) -(test-ssre "abc|bcd" () (or (: #\a #\b #\c) (: #\b #\c #\d))) -(test-ssre "(?<=abc|)" () (look-behind (or (: #\a #\b #\c) (:)))) -(test-ssre "(?<=abc|)" () (look-behind (or (: #\a #\b #\c) (:)))) -(test-ssre "(?<=|abc)" () (look-behind (or (:) (: #\a #\b #\c)))) -(test-ssre "[abc]" () (or #\a #\b #\c)) -(test-ssre "foobar" () (: #\f #\o #\o #\b #\a #\r)) -(test-ssre "foobar" () (: #\f #\o #\o #\b #\a #\r)) -(test-ssre "(?<=pqr)abc(?=xyz)" () (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z)))) -(test-ssre "\\z" () eos) -(test-ssre "\\Z" () (: (? #\newline) eos)) -(test-ssre "(?<=(?=.(?<=x)))" () (look-behind (look-ahead (: nonl (look-behind #\x))))) -(test-ssre "(?![ab]).*" () (: (neg-look-ahead (or #\a #\b)) (* nonl))) -(test-ssre "[a[]" () (or #\a #\[)) -(test-ssre "\\bX" () (: (or bow eow) #\X)) -(test-ssre "\\BX" () (: nwb #\X)) -(test-ssre "X\\b" () (: #\X (or bow eow))) -(test-ssre "X\\B" () (: #\X nwb)) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "a.b" () (: #\a nonl #\b)) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*?)(.)" () (: #\a ($ (*? nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.*)(.)" () (: #\a ($ (* nonl)) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.)(.)" () (: #\a ($ nonl) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.?)(.)" () (: #\a ($ (? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.??)(.)" () (: #\a ($ (?? nonl)) ($ nonl))) -(test-ssre "a(.{3})b" () (: #\a ($ (= 3 nonl)) #\b)) -(test-ssre "a(.{3,})b" () (: #\a ($ (>= 3 nonl)) #\b)) -(test-ssre "a(.{3,}?)b" () (: #\a ($ (**? 3 #f nonl)) #\b)) -(test-ssre "a(.{3,5})b" () (: #\a ($ (** 3 5 nonl)) #\b)) -(test-ssre "a(.{3,5}?)b" () (: #\a ($ (**? 3 5 nonl)) #\b)) -(test-ssre "(?<=aXb)cd" () (: (look-behind (: #\a #\X #\b)) #\c #\d)) -(test-ssre "(?<=(.))X" () (: (look-behind ($ nonl)) #\X)) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "[^a]+" () (+ (~ #\a))) -(test-ssre "^[^a]{2}" () (: bos (= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}" () (: bos (>= 2 (~ #\a)))) -(test-ssre "^[^a]{2,}?" () (: bos (**? 2 #f (~ #\a)))) -(test-ssre "\\D" () (~ numeric)) -(test-ssre ">\\S" () (: #\> (~ space))) -(test-ssre "\\d" () numeric) -(test-ssre "\\s" () space) -(test-ssre "\\D+" () (+ (~ numeric))) -(test-ssre "\\D{2,3}" () (** 2 3 (~ numeric))) -(test-ssre "\\D{2,3}?" () (**? 2 3 (~ numeric))) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d{2,3}" () (** 2 3 numeric)) -(test-ssre "\\d{2,3}?" () (**? 2 3 numeric)) -(test-ssre "\\S+" () (+ (~ space))) -(test-ssre "\\S{2,3}" () (** 2 3 (~ space))) -(test-ssre "\\S{2,3}?" () (**? 2 3 (~ space))) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s{2,3}<" () (: #\> (** 2 3 space) #\<)) -(test-ssre ">\\s{2,3}?<" () (: #\> (**? 2 3 space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w{2,3}" () (** 2 3 (or alnum #\_))) -(test-ssre "\\w{2,3}?" () (**? 2 3 (or alnum #\_))) -(test-ssre "\\W+" () (+ (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}" () (** 2 3 (~ (or alnum #\_)))) -(test-ssre "\\W{2,3}?" () (**? 2 3 (~ (or alnum #\_)))) -(test-ssre "^[ac]*b" () (: bos (* (or #\a #\c)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^[^x]*b" () (: bos (* (~ #\x)) #\b)) -(test-ssre "^\\d*b" () (: bos (* numeric) #\b)) -(test-ssre "(|a)" () ($ (or (:) #\a))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abcd*" () (: #\a #\b #\c (* #\d))) -(test-ssre "abc\\d*" () (: #\a #\b #\c (* numeric))) -(test-ssre "abc[de]*" () (: #\a #\b #\c (* (or #\d #\e)))) -(test-ssre "\\bthe cat\\b" () (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow))) -(test-ssre "[\\p{Nd}]" () numeric) -(test-ssre "[\\p{Nd}+-]+" () (+ (or numeric #\+ #\-))) -(test-ssre "[\\P{Nd}]+" () (+ (~ numeric))) -(test-ssre "^[\\p{Vowel}]" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^[\\p{Any}]X" () (: bos any #\X)) -(test-ssre "^[\\P{Any}]X" () (: bos (~ any) #\X)) -(test-ssre "^[\\p{Any}]?X" () (: bos (? any) #\X)) -(test-ssre "[.\\p{Lu}][.\\p{Ll}][.\\P{Lu}][.\\P{Ll}]" () (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) -(test-ssre "[\\p{L}]" () alpha) -(test-ssre "[\\P{L}]" () (~ alpha)) -(test-ssre "[\\pLu]" () (or alpha #\u)) -(test-ssre "[\\PLu]" () (or (~ alpha) #\u)) -(test-ssre "\\p{Nd}" () numeric) -(test-ssre "\\p{Nd}+" () (+ numeric)) -(test-ssre "\\P{Nd}+" () (+ (~ numeric))) -(test-ssre "^\\p{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^\\p{Any}X" () (: bos any #\X)) -(test-ssre "^\\P{Any}X" () (: bos (~ any) #\X)) -(test-ssre "^\\p{Any}?X" () (: bos (? any) #\X)) -(test-ssre "\\p{L}" () alpha) -(test-ssre "\\P{L}" () (~ alpha)) -(test-ssre "\\pLu" () (: alpha #\u)) -(test-ssre "\\PLu" () (: (~ alpha) #\u)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "\\b...\\B" () (: (or bow eow) nonl nonl nonl nwb)) -(test-ssre "ist" () (: #\i #\s #\t)) -(test-ssre "is+t" () (: #\i (+ #\s) #\t)) -(test-ssre "is+?t" () (: #\i (**? 1 #f #\s) #\t)) -(test-ssre "is?t" () (: #\i (? #\s) #\t)) -(test-ssre "is{2}t" () (: #\i (= 2 #\s) #\t)) -(test-ssre "^A\\s+Z" () (: bos #\A (+ space) #\Z)) -(test-ssre "AskZ" () (: #\A #\s #\k #\Z)) -(test-ssre "[AskZ]+" () (+ (or #\A #\s #\k #\Z))) -(test-ssre "[^s]+" () (+ (~ #\s))) -(test-ssre "[^s]+" () (+ (~ #\s))) -(test-ssre "[^k]+" () (+ (~ #\k))) -(test-ssre "[^k]+" () (+ (~ #\k))) -(test-ssre "[^sk]+" () (+ (~ (or #\s #\k)))) -(test-ssre "[^sk]+" () (+ (~ (or #\s #\k)))) -(test-ssre "i" () #\i) -(test-ssre "I" () #\I) -(test-ssre "[i]" () #\i) -(test-ssre "[zi]" () (or #\z #\i)) -(test-ssre "[iI]" () (or #\i #\I)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre "\\d+" () (+ numeric)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre ">\\s+<" () (: #\> (+ space) #\<)) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\w+" () (+ (or alnum #\_))) -(test-ssre "\\b.+?\\b" () (: (or bow eow) (**? 1 #f nonl) (or bow eow))) -(test-ssre "caf\\B.+?\\B" () (: #\c #\a #\f nwb (**? 1 #f nonl) nwb)) -(test-ssre "x{1,3}+" () (+ (** 1 3 #\x))) -(test-ssre "[a]" () #\a) -(test-ssre "[^a]" () (~ #\a)) -(test-ssre "(?<=C\n)^" () (: (look-behind (: #\C #\newline)) bos)) -(test-ssre "\\w+(?=\t)" () (: (+ (or alnum #\_)) (look-ahead #\tab))) +; options tests -;; new set notation tests +(test-ssre "(?i)A string" (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g))) +(test-ssre "(?i)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl))))) +(test-ssre "(?i)^[W-c]+$" (w/nocase (: bos (+ (char-range #\W #\c)) eos))) +(test-ssre "(?s)\\A(.)*\\z" (: bos (* ($ any)) eos)) +(test-ssre "(?i)[^a]" (w/nocase (~ #\a))) +(test-ssre "(?i:saturday|sunday)" (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y)))) +(test-ssre "(?i)(?i:a)b" (w/nocase (: #\a #\b))) +(test-ssre "(?i)((?i:a))b" (w/nocase (: ($ #\a) #\b))) +(test-ssre "(?i)(?-i:a)b" (w/nocase (: (w/case #\a) #\b))) +(test-ssre "(?i)((?-i:a))b" (w/nocase (: ($ (w/case #\a)) #\b))) +(test-ssre "(?i)(?-i:a)b" (w/nocase (: (w/case #\a) #\b))) +(test-ssre "((?-i:a))b" (: ($ #\a) #\b)) +(test-ssre "(?-i:a)b" (: #\a #\b)) +(test-ssre "((?-i:a))b" (: ($ #\a) #\b)) +(test-ssre "(?is)((?-i:a.))b" (w/nocase (: ($ (w/case (: #\a any))) #\b))) +;(test-ssre "^a(?#xxx){3}c" (: bos "a" "{3}c")) -- (?#comments) not supported +(test-ssre "(?m)^b$" (: bol #\b eol)) +(test-ssre "(?ms)^b." (: bol #\b any)) +(test-ssre "(?i)([\\w:]+::)?(\\w+)$" (w/nocase (: (? ($ (: (+ (or alnum #\_ #\:)) #\: #\:))) ($ (+ (or alnum #\_))) eos))) +(test-ssre "(?x)x y z | a b c" (or (: #\x #\y #\z) (: #\a #\b #\c))) +(test-ssre "(?i)AB(?-i:C)" (w/nocase (: #\A #\B (w/case #\C)))) +(test-ssre "(?i)reg(?:ul(?:[a@]|ae)r|ex)" (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x))))) +(test-ssre "ab cd (?x: de fg)" (: #\a #\b #\space #\c #\d #\space (: #\d #\e #\f #\g))) +(test-ssre "ab cd(?x: de fg) h" (: #\a #\b #\space #\c #\d (: #\d #\e #\f #\g) #\space #\h)) +(test-ssre "(?s)^\\w+=.*(\\\\\n.*)*" (: bos (+ (or alnum #\_)) #\= (* any) (* ($ (: #\\ #\newline (* any)))))) +(test-ssre "(?i)[^a]*" (w/nocase (* (~ #\a)))) +(test-ssre "(?i)[^a]*?X" (w/nocase (: (*? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]+?X" (w/nocase (: (**? 1 #f (~ #\a)) #\X))) +(test-ssre "(?i)[^a]?X" (w/nocase (: (? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]??X" (w/nocase (: (?? (~ #\a)) #\X))) +(test-ssre "(?i)[^a]{2,3}" (w/nocase (** 2 3 (~ #\a)))) +(test-ssre "(?i)[^a]{2,3}?" (w/nocase (**? 2 3 (~ #\a)))) +(test-ssre "(?i)(?<=a{2})b" (w/nocase (: (look-behind (= 2 #\a)) #\b))) +(test-ssre "(?i)(?= 8 (* (or (~ alpha) #\a #\*)))) +(test-ssre "(?i)abc" (w/nocase (: #\a #\b #\c))) +(test-ssre "(?i)(?-i)the end" (w/nocase (w/case (: #\t #\h #\e #\space #\e #\n #\d)))) ; optimise? +(test-ssre "(?i)([\\da-f:]+)$" (w/nocase (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos))) +(test-ssre "(?i)^[\\da-f](\\.[\\da-f])*$" (w/nocase (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos))) +(test-ssre "(?is)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* any))))) +(test-ssre "(?isn)([^.]*)\\.([^:]*):[T ]+(.*)" (w/nocase (: (* (~ #\.)) #\. (* (~ #\:)) #\: (+ (or #\T #\space)) (* any)))) +(test-ssre "(?i)^[W-c]+$" (w/nocase (: bos (+ (char-range #\W #\c)) eos))) +(test-ssre "(?i)^[\x3f;-\x5F;]+$" (w/nocase (: bos (+ (char-range #\? #\_)) eos))) +(test-ssre "(?i)[^a]" (w/nocase (~ #\a))) +(test-ssre "(?i)[^a]+" (w/nocase (+ (~ #\a)))) +(test-ssre "(?i)[^az]" (w/nocase (~ (or #\a #\z)))) +(test-ssre "(?i)\\b(foo)\\s+(\\w+)" (w/nocase (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))))) +(test-ssre "a(?i:b)c" (: #\a (w/nocase #\b) #\c)) +(test-ssre "a(?i:b)*c" (: #\a (* (w/nocase #\b)) #\c)) +(test-ssre "(?im)^(?-u:\\w\\s*\\w)$" (w/nocase (: bol (w/ascii (: (or alnum #\_) (* space) (or alnum #\_))) eol))) +(test-ssre "(?i)abc\\X*" (w/nocase (: #\a #\b #\c (* grapheme)))) +(test-ssre "(?n)((((((((((a))))))))))" #\a) +(test-ssre "(?n)((((((((?-n:(a)))))))))" ($ #\a)) +(test-ssre "(?n)((((((((?-n:(a)|(a)))))))))" (or ($ #\a) ($ #\a))) -(test-ssre "{Nd}" () numeric) -(test-ssre "{Nd|[+]|[-]}+" () (+ (or numeric #\+ #\-))) -(test-ssre "{~Nd}+" () (+ (~ numeric))) -(test-ssre "^{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^{Any}X" () (: bos any #\X)) -(test-ssre "^{~Any}X" () (: bos (~ any) #\X)) -(test-ssre "^{Any}?X" () (: bos (? any) #\X)) -(test-ssre "{[.]|Lu}{[.]|Ll}{[.]|~Lu}{[.]|~Ll}" () (: (or #\. upper) (or #\. lower) (or #\. (~ upper)) (or #\. (~ lower)))) -(test-ssre "{L}" () alpha) -(test-ssre "{~L}" () (~ alpha)) -(test-ssre "{L}u" () (: alpha #\u)) -(test-ssre "{~L}u" () (: (~ alpha) #\u)) -(test-ssre "{L-Vowel}u" () (: (- alpha (or #\A #\E #\I #\O #\U #\Y #\W)) #\u)) -(test-ssre "{Nd}" () numeric) -(test-ssre "{Nd}+" () (+ numeric)) -(test-ssre "{~Nd}+" () (+ (~ numeric))) -(test-ssre "^{Vowel}" () (: bos (or #\A #\E #\I #\O #\U #\Y #\W))) -(test-ssre "^{Any}X" () (: bos any #\X)) -(test-ssre "^{~Any}X" () (: bos (~ any) #\X)) -(test-ssre "^{Any}?X" () (: bos (? any) #\X)) -(test-ssre "{u}{l|d}*" () (: upper (* (or lower numeric)))) -(test-ssre "{~d}{an|[']}*" () (: (~ numeric) (* (or alnum #\')))) -(test-ssre "{<}{u&~Vowel|d}{!b}{an-d}*{>}" () (: bow (or (- upper (or #\A #\E #\I #\O #\U #\Y #\W)) numeric) nwb (* (- alnum numeric)) eow)) -(test-ssre "{}\\X*" () (: grapheme (* grapheme))) +; restore default definitions for sre tests +(ssre-definitions *ssre-definitions*) -;; options tests +; NOTE: reverse translations on the right are not the only correct ones; there can be equivalent translations, which are also correct -(test-ssre "A string" (i) (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i) (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl))))) -(test-ssre "^[W-c]+$" (i) (w/nocase (: bos (+ (char-range #\W #\c)) eos))) -(test-ssre "\\A(.)*\\z" (s) (: bos (* ($ any)) eos)) -(test-ssre "[^a]" (i) (w/nocase (~ #\a))) -(test-ssre "(?i:saturday|sunday)" () (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y)))) -(test-ssre "(?i:a)b" (i) (w/nocase (: #\a #\b))) -(test-ssre "((?i:a))b" (i) (w/nocase (: ($ #\a) #\b))) -(test-ssre "(?-i:a)b" (i) (w/nocase (: (w/case #\a) #\b))) -(test-ssre "((?-i:a))b" (i) (w/nocase (: ($ (w/case #\a)) #\b))) -(test-ssre "(?-i:a)b" (i) (w/nocase (: (w/case #\a) #\b))) -(test-ssre "((?-i:a))b" () (: ($ #\a) #\b)) -(test-ssre "(?-i:a)b" () (: #\a #\b)) -(test-ssre "((?-i:a))b" () (: ($ #\a) #\b)) -(test-ssre "((?-i:a.))b" (i s) (w/nocase (: ($ (w/case (: #\a any))) #\b))) -;(test-ssre "^a(?#xxx){3}c" () (: bos "a" "{3}c")) -- (?#comments) not supported -(test-ssre "(?m)^b$" () (: bol #\b eol)) -(test-ssre "(?ms)^b." () (: bol #\b any)) -(test-ssre "([\\w:]+::)?(\\w+)$" (i) (w/nocase (: (? ($ (: (+ (or alnum #\_ #\:)) #\: #\:))) ($ (+ (or alnum #\_))) eos))) -(test-ssre "(?x)x y z | a b c" () (or (: #\x #\y #\z) (: #\a #\b #\c))) -(test-ssre "(?i)AB(?-i:C)" () (w/nocase (: #\A #\B (w/case #\C)))) -(test-ssre "(?i)reg(?:ul(?:[a@]|ae)r|ex)" () (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x))))) -(test-ssre "ab cd (?x: de fg)" () (: #\a #\b #\space #\c #\d #\space (: #\d #\e #\f #\g))) -(test-ssre "ab cd(?x: de fg) h" () (: #\a #\b #\space #\c #\d (: #\d #\e #\f #\g) #\space #\h)) -(test-ssre "^\\w+=.*(\\\\\n.*)*" (s) (: bos (+ (or alnum #\_)) #\= (* any) (* ($ (: #\\ #\newline (* any)))))) -(test-ssre "[^a]*" (i) (w/nocase (* (~ #\a)))) -(test-ssre "[^a]*?X" (i) (w/nocase (: (*? (~ #\a)) #\X))) -(test-ssre "[^a]+?X" (i) (w/nocase (: (**? 1 #f (~ #\a)) #\X))) -(test-ssre "[^a]?X" (i) (w/nocase (: (? (~ #\a)) #\X))) -(test-ssre "[^a]??X" (i) (w/nocase (: (?? (~ #\a)) #\X))) -(test-ssre "[^a]{2,3}" (i) (w/nocase (** 2 3 (~ #\a)))) -(test-ssre "[^a]{2,3}?" (i) (w/nocase (**? 2 3 (~ #\a)))) -(test-ssre "(?<=a{2})b" (i) (w/nocase (: (look-behind (= 2 #\a)) #\b))) -(test-ssre "(?= 8 (* (or (~ alpha) #\a #\*)))) -(test-ssre "(?i)abc" (i) (w/nocase (: #\a #\b #\c))) -(test-ssre "(?-i)the end" (i) (w/nocase (w/case (: #\t #\h #\e #\space #\e #\n #\d)))) ; optimise? -(test-ssre "([\\da-f:]+)$" (i) (w/nocase (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos))) -(test-ssre "^[\\da-f](\\.[\\da-f])*$" (i) (w/nocase (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i s) (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* any))))) -(test-ssre "([^.]*)\\.([^:]*):[T ]+(.*)" (i s n) (w/nocase (: (* (~ #\.)) #\. (* (~ #\:)) #\: (+ (or #\T #\space)) (* any)))) -(test-ssre "^[W-c]+$" (i) (w/nocase (: bos (+ (char-range #\W #\c)) eos))) -(test-ssre "^[\x3f;-\x5F;]+$" (i) (w/nocase (: bos (+ (char-range #\? #\_)) eos))) -(test-ssre "[^a]" (i) (w/nocase (~ #\a))) -(test-ssre "[^a]+" (i) (w/nocase (+ (~ #\a)))) -(test-ssre "[^az]" (i) (w/nocase (~ (or #\a #\z)))) -(test-ssre "\\b(foo)\\s+(\\w+)" (i) (w/nocase (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))))) -(test-ssre "a(?i:b)c" () (: #\a (w/nocase #\b) #\c)) -(test-ssre "a(?i:b)*c" () (: #\a (* (w/nocase #\b)) #\c)) -(test-ssre "^(?-u:\\w\\s*\\w)$" (i m) (w/nocase (: bol (w/ascii (: (or alnum #\_) (* space) (or alnum #\_))) eol))) -(test-ssre "abc\\X*" (i) (w/nocase (: #\a #\b #\c (* grapheme)))) -(test-ssre "((((((((((a))))))))))" (n) #\a) -(test-ssre "((((((((?-n:(a)))))))))" (n) ($ #\a)) +(test-sre (: #\t #\h #\e #\space #\q #\u #\i #\c #\k #\space #\b #\r #\o #\w #\n #\space #\f #\o #\x) "the quick brown fox") +(test-sre (: (* #\a) #\a #\b (? #\c) #\x #\y (+ #\z) #\p #\q (= 3 #\r) #\a (>= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z) "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB*zz") +(test-sre (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z) "^(abc){1,2}zz") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}c") +(test-sre (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c) "^(ba|b*){1,2}?bc") +(test-sre (: bos (or #\a #\b #\] #\c #\d #\e)) "^[ab\\]cde]") +(test-sre (: bos (or #\] #\c #\d #\e)) "^[\\]cde]") +(test-sre (: bos (~ (or #\a #\b #\] #\c #\d #\e))) "^[^ab\\]cde]") +(test-sre (: bos (~ (or #\] #\c #\d #\e))) "^[^\\]cde]") +(test-sre (: bos #\@) "^@") +(test-sre (: bos (+ (char-range #\0 #\9)) eos) "^[0-9]+$") +(test-sre (: bos (* nonl) #\n #\t #\e #\r) "^.*nter") +(test-sre (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos) "^xxx[0-9]+$") +(test-sre (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+[0-9][0-9][0-9]$") +(test-sre (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+?[0-9][0-9][0-9]$") +(test-sre (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos) "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$") +(test-sre #\: ":") +(test-sre (: ($ (+ (or numeric (char-range #\a #\f) #\:))) eos) "([\\da-f:]+)$") +(test-sre (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos) "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos) "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$") +(test-sre (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos) "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$") +(test-sre (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e)) "^(?=ab(de))(abd)(e)") +(test-sre (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f)) "^(?!(ab)de|x)(abd)(f)") +(test-sre (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b))) "^(?=(ab(cd)))(ab)") +(test-sre (: bos (or numeric (char-range #\a #\f)) (* ($ (: #\. (or numeric (char-range #\a #\f))))) eos) "^[\\da-f](\\.[\\da-f])*$") +(test-sre (: bos #\" (* nonl) #\" (* space) (? ($ (: #\; (* nonl)))) eos) "^\".*\"\\s*(;.*)?$") +(test-sre (: bos eos) "^$") +(test-sre (: bos #\a #\space #\b (or #\c #\space) #\d eos) "^a b[c ]d$") +(test-sre (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos) "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$") +(test-sre (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos) "^a(b(c))d(e(f))h(i(j))k(l(m))$") +(test-sre (: bos (or alnum #\_) (~ (or alnum #\_)) space (~ space) numeric (~ numeric) #\]) "^\\w\\W\\s\\S\\d\\D\\]") +(test-sre (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\}))) "^[.\\^$|()*+?{,}]+") +(test-sre (: bos (* #\a) (or alnum #\_)) "^a*\\w") +(test-sre (: bos (*? #\a) (or alnum #\_)) "^a*?\\w") +(test-sre (: bos (+ #\a) (or alnum #\_)) "^a+\\w") +(test-sre (: bos (**? 1 #f #\a) (or alnum #\_)) "^a+?\\w") +(test-sre (: bos (= 8 numeric) (>= 2 (or alnum #\_))) "^\\d{8}\\w{2,}") +(test-sre (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos) "^[aeiou\\d]{4,5}$") +(test-sre (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric))) "^[aeiou\\d]{4,5}?") +(test-sre (: bos #\F #\r #\o #\m (+ #\space) ($ (+ (~ #\space))) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (or (char-range #\a #\z) (char-range #\A #\Z)) (+ #\space) (? (char-range #\0 #\9)) (char-range #\0 #\9) (+ #\space) (char-range #\0 #\9) (char-range #\0 #\9) #\: (char-range #\0 #\9) (char-range #\0 #\9)) "^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]") +(test-sre (: bos #\F #\r #\o #\m (+ space) (+ (~ space)) (+ space) (= 2 ($ (: (= 3 (or (char-range #\a #\z) (char-range #\A #\Z))) (+ space)))) (** 1 2 numeric) (+ space) numeric numeric #\: numeric numeric) "^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d") +(test-sre (: bos #\1 #\2 nonl #\3 #\4) "^12.34") +(test-sre (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl))) "foo(?!bar)(.*)") +(test-sre (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl))) "(?:(?!foo)...|^.{0,2})bar(.*)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: (neg-look-ahead bos) #\a #\b #\c) "{!= 8 numeric) #\@ (+ nonl) (~ #\k) eos) "^\\d{8,}@.+[^k]$") +(test-sre (~ #\a) "[^a]") +(test-sre (~ (or #\a #\z)) "[^az]") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]{1,6}?LL") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]+?LL") +(test-sre (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric)) "(\\.\\d\\d[1-9]?)\\d+") +(test-sre ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric)))))) "(\\.\\d\\d((?=0)|\\d(?=\\d)))") +(test-sre (: (or bow eow) ($ (: #\f #\o #\o)) (+ space) ($ (+ (or alnum #\_)))) "\\b(foo)\\s+(\\w+)") +(test-sre (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r) "foo(.*)bar") +(test-sre (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r) "foo(.*?)bar") +(test-sre (: ($ (* nonl)) ($ (* numeric))) "(.*)(\\d*)") +(test-sre (: ($ (* nonl)) ($ (+ numeric))) "(.*)(\\d+)") +(test-sre (: ($ (*? nonl)) ($ (* numeric))) "(.*?)(\\d*)") +(test-sre (: ($ (*? nonl)) ($ (+ numeric))) "(.*?)(\\d+)") +(test-sre (: ($ (* nonl)) ($ (+ numeric)) eos) "(.*)(\\d+)$") +(test-sre (: ($ (*? nonl)) ($ (+ numeric)) eos) "(.*?)(\\d+)$") +(test-sre (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos) "(.*)\\b(\\d+)$") +(test-sre (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos) "(.*\\D)(\\d+)$") +(test-sre (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3))) "^\\D*(?!123)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: bos (or #\W #\-) #\4 #\6 #\]) "^[W\\-]46\\]") +(test-sre (: bos (or (char-range #\W #\]) #\4 #\6)) "^[W-\\]46]") +(test-sre (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,10}otherword") +(test-sre (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,300}otherword") +(test-sre (: bos (= 0 ($ #\a))) "^(a){0}") +(test-sre (: bos (** 0 1 ($ #\a))) "^(a)?") +(test-sre (: bos (** 0 2 ($ #\a))) "^(a){0,2}") +(test-sre (: bos (** 0 3 ($ #\a))) "^(a){0,3}") +(test-sre (: bos (>= 0 ($ #\a))) "^(a)*") +(test-sre (: bos (= 1 ($ #\a))) "^(a)") +(test-sre (: bos (** 1 2 ($ #\a))) "^(a){1,2}") +(test-sre (: bos (** 1 3 ($ #\a))) "^(a){1,3}") +(test-sre (: bos (>= 1 ($ #\a))) "^(a)+") +(test-sre (: (* nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (>= 0 nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (* nonl) eos) ".*$") +(test-sre ($ (or (: (* nonl) #\X) (: bos #\B))) "(.*X|^B)") +(test-sre (: bos (* nonl) #\B) "^.*B") +(test-sre (: bos (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9)) "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]") +(test-sre (: bos numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric numeric) "^\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d") +(test-sre (: bos (= 12 (or #\a #\b #\c))) "^[abc]{12}") +(test-sre (: bos (= 12 (char-range #\a #\c))) "^[a-c]{12}") +(test-sre (: bos (= 12 ($ (or #\a #\b #\c)))) "^([abc]){12}") +(test-sre (: bos (or #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) "^[abcdefghijklmnopqrstuvwxy0123456789]") +(test-sre (: #\a #\b #\c #\d (= 0 #\e)) "abcde{0}") +(test-sre (: #\a #\b (= 0 (or #\c #\d)) #\e) "ab[cd]{0}e") +(test-sre (: #\a #\b (= 0 ($ #\c)) #\d) "ab(c){0}d") +(test-sre (: #\a ($ (* #\b))) "a(b*)") +(test-sre (: #\a #\b (= 0 numeric) #\e) "ab\\d{0}e") +(test-sre (: #\" (* ($ (or (+ (~ (or #\\ #\"))) (: #\\ nonl)))) #\") "\"([^\\\\\"]+|\\\\.)*\"") +(test-sre (*? nonl) ".*?") +(test-sre (or bow eow) "\\b") +(test-sre (or bow eow) "\\b") +(test-sre (: #\a (~ #\a) #\b) "a[^a]b") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a (~ #\a) #\b) "a[^a]b") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}?c") +(test-sre (: (neg-look-ahead bos) #\x) "{!= 0 #\b) #\b #\c) "ab*bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (** 1 3 #\b) #\b #\c) "ab{1,3}bc") +(test-sre (: #\a (** 3 4 #\b) #\b #\c) "ab{3,4}bc") +(test-sre (: #\a (** 4 5 #\b) #\b #\c) "ab{4,5}bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (** 0 1 #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\c) "ab?c") +(test-sre (: #\a (** 0 1 #\b) #\c) "ab?c") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre bos "^") +(test-sre eos "$") +(test-sre (: #\a nonl #\c) "a.c") +(test-sre (: #\a (* nonl) #\c) "a.*c") +(test-sre (: #\a (or #\b #\c) #\d) "a[bc]d") +(test-sre (: #\a (char-range #\b #\d) #\e) "a[b-d]e") +(test-sre (: #\a (char-range #\b #\d)) "a[b-d]") +(test-sre (: #\a (or #\- #\b)) "a[\\-b]") +(test-sre (: #\a (or #\b #\-)) "a[b\\-]") +(test-sre (: #\a #\]) "a\\]") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (~ (or #\b #\c)) #\d) "a[^bc]d") +(test-sre (: #\a (~ (or #\- #\b)) #\c) "a[^\\-b]c") +(test-sre (: #\a (~ (or #\] #\b)) #\c) "a[^\\]b]c") +(test-sre (: (or bow eow) #\a (or bow eow)) "\\ba\\b") +(test-sre (: (or bow eow) #\y (or bow eow)) "\\by\\b") +(test-sre (: nwb #\a nwb) "\\Ba\\B") +(test-sre (: nwb #\y (or bow eow)) "\\By\\b") +(test-sre (: (or bow eow) #\y nwb) "\\by\\B") +(test-sre (: nwb #\y nwb) "\\By\\B") +(test-sre (or alnum #\_) "\\w") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: #\a space #\b) "a\\sb") +(test-sre (: #\a (~ space) #\b) "a\\Sb") +(test-sre numeric "\\d") +(test-sre (~ numeric) "\\D") +(test-sre (or (: #\a #\b) (: #\c #\d)) "ab|cd") +(test-sre (: ($ (:)) #\e #\f) "()ef") +(test-sre (: eos #\b) "$b") +(test-sre (: #\a #\( #\b) "a\\(b") +(test-sre (: #\a (* #\() #\b) "a\\(*b") +(test-sre (: #\a #\\ #\b) "a\\\\b") +(test-sre ($ ($ #\a)) "((a))") +(test-sre (: ($ #\a) #\b ($ #\c)) "(a)b(c)") +(test-sre (: (+ #\a) (+ #\b) #\c) "a+b+c") +(test-sre (: (>= 1 #\a) (>= 1 #\b) #\c) "a+b+c") +(test-sre (: #\a (**? 1 #f nonl) #\c) "a.+?c") +(test-sre (* ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (>= 0 ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (+ ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (>= 1 ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (? ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (** 0 1 ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (* (~ (or #\a #\b))) "[^ab]*") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (* #\a) "a*") +(test-sre (: (* ($ (or #\a #\b #\c))) #\d) "([abc])*d") +(test-sre (: (* ($ (or #\a #\b #\c))) #\b #\c #\d) "([abc])*bcd") +(test-sre (or #\a #\b #\c #\d #\e) "[abcde]") +(test-sre (: ($ (or #\a #\b #\c #\d #\e)) #\f) "([abcde])f") +(test-sre (: #\a #\b #\c (* #\d) #\e #\f #\g) "abcd*efg") +(test-sre (: #\a (* #\b)) "ab*") +(test-sre (: ($ (or (: #\a #\b) (: #\c #\d))) #\e) "(ab|cd)e") +(test-sre (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j) "[abhgefdc]ij") +(test-sre (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e) "^(ab|cd)e") +(test-sre (: ($ (or (: #\a #\b #\c) (:))) #\e #\f) "(abc|)ef") +(test-sre (: ($ (or #\a #\b)) (* #\c) #\d) "([ab])c*d") +(test-sre (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c) "(ab|ab*)bc") +(test-sre (: #\a ($ (* (or #\b #\c))) (* #\c)) "a([bc]*)c*") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]*)(c*d)") +(test-sre (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]+)(c*d)") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d))) "a([bc]*)(c+d)") +(test-sre (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]*dcdcde") +(test-sre (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]+dcdcde") +(test-sre (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c) "(ab|a)b*c") +(test-sre (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d)) "((a)(b)c)(d)") +(test-sre (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_))) "[a-zA-Z_][a-zA-Z0-9_]*") +(test-sre (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos)) "^a(bc+|b[eh])g|.h$") +(test-sre ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k))))) "(bc+d$|ef*g.|h?i([jk]))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) "((((((((((a))))))))))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))) "(((((((((a)))))))))") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t) "multiple words of text") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s) "multiple words") +(test-sre (: ($ (* nonl)) #\c ($ (* nonl))) "(.*)c(.*)") +(test-sre (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\)) "\\((.*), (.*)\\)") +(test-sre #\k "k") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ (: #\b #\c)) #\d) "a(bc)d") +(test-sre (: #\a (? #\-) #\c) "a-?c") +(test-sre (: #\a (neg-look-ahead #\b) nonl) "a(?!b).") +(test-sre (: #\a (look-ahead #\d) nonl) "a(?=d).") +(test-sre (: #\a (look-ahead (or #\c #\d)) nonl) "a(?=[cd]).") +(test-sre (: #\a (or #\b #\c #\d) ($ nonl)) "a[bcd](.)") +(test-sre (: #\a (* (or #\b #\c #\d)) ($ nonl)) "a[bcd]*(.)") +(test-sre (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl)) "a[bcd]+?(.)") +(test-sre (: #\a (+ (or #\b #\c #\d)) ($ nonl)) "a[bcd]+(.)") +(test-sre (: #\a (= 2 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{2}(.)") +(test-sre (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}(.)") +(test-sre (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}?(.)") +(test-sre (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}(.)") +(test-sre (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}?(.)") +(test-sre (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}(.)") +(test-sre (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}?(.)") +(test-sre (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}(.)") +(test-sre (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}?(.)") +(test-sre (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl)) "a(?:b|([ce]){1,2}?|d)+?(.)") +(test-sre (: bos (? ($ (+ nonl))) #\B) "^(.+)?B") +(test-sre (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos)) "^([^a-z])|(\\^)$") +(test-sre (: bos (or #\< #\>) #\&) "^[<>]&") +(test-sre (: (look-behind #\a) #\b) "(?<=a)b") +(test-sre (: (neg-look-behind #\c) #\b) "(? (+ #\a))) #\a #\b) "(>a+)ab") +(test-sre (: #\b eos) "b$") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o) "(?<=\\d{3}(?!999))foo") +(test-sre (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o) "(?<=(?!...999)\\d{3})foo") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o) "(?<=\\d{3}(?!999)...)foo") +(test-sre (: (look-behind (: (= 3 numeric) nonl nonl nonl)) (neg-look-behind (: #\9 #\9 #\9)) #\f #\o #\o) "(?<=\\d{3}...)(?= 0 #\b) (or alnum #\_)) "a*b*\\w") +(test-sre (: (* #\a) (* numeric) (or alnum #\_)) "a*\\d*\\w") +(test-sre (: (* #\a) (* #\b) (or alnum #\_)) "a*b*\\w") +(test-sre (: eos (neg-look-behind #\newline)) "$(?= 2 ($ (** 2 3 #\a)))) #\a) "^(?:(a{2,3}){2,})+a") +(test-sre (look-ahead #\C) "(?=C)") +(test-sre (: (or (: #\a (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space) (: #\b (-> quote (or (: #\space (-> apostrophe #\')) (-> realquote #\"))) #\space)) #\space ($ (or (: (backref quote) (+ (char-range #\a #\z))) (+ (char-range #\0 #\9))))) "(?:a(? (?')|(?\")) |b(? (?')|(?\")) ) (\\k[a-z]+|[0-9]+)") +(test-sre (: bos (+ (>= 2 ($ #\a))) ($ (or alnum #\_))) "^(?:(a){2,})+(\\w)") +(test-sre (: bos (+ (>= 2 #\a)) ($ (or alnum #\_))) "^(?:a{2,})+(\\w)") +(test-sre (: bos (*? nonl) ($ (or #\a (: #\b #\c)))) "^.*?(a|bc)") +(test-sre (: bos (*? nonl) (or #\a (: #\b #\c) #\d)) "^.*?(?:a|bc|d)") +(test-sre (: (*? nonl) #\a (look-behind (: #\b #\a))) ".*?a(?<=ba)") +(test-sre (or (: #\a (look-ahead (: #\b #\c)) nonl) (: #\a #\b #\d)) "a(?=bc).|abd") +(test-sre (: bos (*? nonl) (or #\a (: #\b #\c))) "^.*?(?:a|bc)") +(test-sre (: bos (* numeric) (= 4 (or alnum #\_))) "^\\d*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos (* #\a) (= 4 (or alnum #\_))) "^a*\\w{4}") +(test-sre (: (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?:(?foo)|(?bar))\\k") +(test-sre (: (-> n #\A) (or (-> n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?A)(?:(?foo)|(?bar))\\k") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: (or #\x (or (+ ($ (or (: #\x #\x) (: #\y #\y)))) #\x #\x #\x #\x #\x) #\a #\a #\a) #\b #\c) "(?:x|(xx|yy)+|[xxxxxaaa])bc") +(test-sre (: space #\a #\b #\c) "\\sabc") +(test-sre (: (* #\Z) (= 216 ($ (or (:) (* #\d))))) "Z*(|d*){216}") +(test-sre (: (look-behind (: #\a (= 0 ($ #\B)) #\c)) #\X) "(?<=a(B){0}c)X") +(test-sre (: (+ #\a) (or (:) #\b) #\a) "a+(?:|b)a") +(test-sre (: (? #\X) (= 3335 ($ (or #\R (:) (:))))) "X?(R||){3335}") +(test-sre (or (: (neg-look-ahead ($ #\b)) #\c) #\b) "(?!(b))c|b") +(test-sre (or (: (look-ahead ($ #\b)) #\b) #\c) "(?=(b))b|c") +(test-sre (: #\< (or #\a #\space #\b) #\>) "<[a b]>") +(test-sre (+ (or numeric #\-)) "[\\d\\-]+") +(test-sre (look-behind (? (look-ahead nonl))) "(?<=(?=.)?)") +(test-sre (look-behind (** 4 5 (look-ahead nonl))) "(?<=(?=.){4,5})") +(test-sre (look-behind (: (** 4 5 (look-ahead nonl)) #\x)) "(?<=(?=.){4,5}x)") +(test-sre (: #\space #\space #\space (* (-> word (: #\space (+ (or alnum #\_)) #\space))) #\space #\space #\space #\space #\. #\space #\space #\space) " (? \\w+ )* \\. ") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre (: (look-behind (look-ahead (look-behind #\a))) #\b) "(?<=(?=(?<=a)))b") +(test-sre (: (look-behind (: #\a (? #\b) #\c)) nonl nonl nonl) "(?<=ab?c)...") +(test-sre (: (look-behind (or (: #\P #\Q #\R) (: #\a (? #\b) #\c))) nonl nonl nonl) "(?<=PQR|ab?c)...") +(test-sre (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q #\R))) nonl nonl nonl) "(?<=ab?c|PQR)...") +(test-sre (: (look-behind (or (: #\P #\Q) (: #\a (? #\b) #\c))) nonl nonl nonl) "(?<=PQ|ab?c)...") +(test-sre (: (look-behind (or (: #\a (? #\b) #\c) (: #\P #\Q))) nonl nonl nonl) "(?<=ab?c|PQ)...") +(test-sre (: (look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl) "(?<=a(b?c|d?e?e)f)X.") +(test-sre (: (neg-look-behind (: #\a ($ (or (: (? #\b) #\c) (: (? #\d) (? #\e) #\e))) #\f)) #\X nonl) "(?= 5 (char-range #\a #\z)) #\b) #\x) "[a-z]{5,}b|x") +(test-sre (or (: (**? 1 6 (char-range #\a #\z)) #\s) #\x) "[a-z]{1,6}?s|x") +(test-sre #\@ "@") +(test-sre (: #\@ #\@ #\@ #\x #\x #\x) "@@@xxx") +(test-sre (: (look-behind #\x) #\b #\a #\d #\u #\t #\f) "(?<=x)badutf") +(test-sre (: (look-behind (: #\x #\x)) #\b #\a #\d #\u #\t #\f) "(?<=xx)badutf") +(test-sre (: (look-behind (: #\x #\x #\x #\x)) #\b #\a #\d #\u #\t #\f) "(?<=xxxx)badutf") +(test-sre #\X "X") +(test-sre (+ #\a) "a+") +(test-sre #\A "A") +(test-sre #\x "x") +(test-sre (: #\a #\b #\c) "abc") +(test-sre #\X "X") +(test-sre (: (look-behind nonl) #\X) "(?<=.)X") +(test-sre (+ #\a) "a+") +(test-sre #\a "a") +(test-sre nonl ".") +(test-sre #\s "s") +(test-sre (~ #\s) "[^s]") +(test-sre (: #\a (*? nonl) #\a) "a.*?a") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre (: #\a (or bow eow)) "a\\b") +(test-sre (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b))) "abc(?=abcde)(?=ab)") +(test-sre (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3) "(?<=abc)123") +(test-sre (: (or bow eow) #\a #\b #\c (or bow eow)) "\\babc\\b") +(test-sre (: (look-behind (: #\a #\b #\c)) #\d #\e #\f) "(?<=abc)def") +(test-sre (: #\a #\b #\c (look-behind (: #\b #\c)) #\d #\e #\f) "abc(?<=bc)def") +(test-sre (: (look-behind (: #\a #\b)) #\c #\d #\e #\f) "(?<=ab)cdef") +(test-sre (: #\b (neg-look-behind (: #\a #\x)) (neg-look-ahead (: #\c #\x))) "b(? A (or (: #\t #\o #\m) (: #\b #\o #\n))) #\- (backref A)) "(?tom|bon)-\\k") +(test-sre (: #\X (** 2 4 #\a) #\b) "Xa{2,4}b") +(test-sre (: #\X (**? 2 4 #\a) #\b) "Xa{2,4}?b") +(test-sre (: #\X (+ (** 2 4 #\a)) #\b) "X(?:a{2,4})+b") +(test-sre (: #\X (** 2 4 numeric) #\b) "X\\d{2,4}b") +(test-sre (: #\X (**? 2 4 numeric) #\b) "X\\d{2,4}?b") +(test-sre (: #\X (+ (** 2 4 numeric)) #\b) "X(?:\\d{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}b") +(test-sre (: #\X (**? 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b) "X(?:[abc]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: #\Z (neg-look-ahead (:))) "Z(?!)") +(test-sre (: #\d #\o #\g (? ($ (: #\s #\b #\o #\d #\y)))) "dog(sbody)?") +(test-sre (: #\d #\o #\g (?? ($ (: #\s #\b #\o #\d #\y)))) "dog(sbody)??") +(test-sre (or (: #\d #\o #\g) (: #\d #\o #\g #\s #\b #\o #\d #\y)) "dog|dogsbody") +(test-sre (or (: #\d #\o #\g #\s #\b #\o #\d #\y) (: #\d #\o #\g)) "dogsbody|dog") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: (look-behind (: #\a #\b #\c)) #\1 #\2 #\3) "(?<=abc)123") +(test-sre (: (or bow eow) #\a #\b #\c (or bow eow)) "\\babc\\b") +(test-sre (: (? #\a) (? #\b)) "a?b?") +(test-sre (: bos (? #\a) (? #\b)) "^a?b?") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: (look-behind (: #\a #\b #\c)) #\d #\e #\f) "(?<=abc)def") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre (: #\a #\b #\c (or bow eow)) "abc\\b") +(test-sre (: #\a #\b #\c nwb) "abc\\B") +(test-sre (+ nonl) ".+") +(test-sre (: (look-behind (+ ($ (: #\a #\b #\c)))) #\X) "(?<=(abc)+)X") +(test-sre (or (: ($ #\a) #\b) (: #\a #\c)) "(a)b|ac") +(test-sre (or (: ($ #\a) ($ #\b) #\x) (: #\a #\b #\c)) "(a)(b)x|abc") +(test-sre (: (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r)) ($ (: #\b #\a #\z))) #\X) "(?:(foo)|(bar)|(baz))X") +(test-sre (or (: ($ (: #\a #\b)) #\x) (: #\a #\b)) "(ab)x|ab") +(test-sre ($ ($ ($ ($ ($ #\a))))) "(((((a)))))") +(test-sre (: (*? #\a) (*? #\b)) "a*?b*?") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: #\a ($ #\b) #\c) "a(b)c") +(test-sre (or (: ($ #\a) ($ #\b)) ($ #\c)) "(a)(b)|(c)") +(test-sre (or (-> A #\a) (-> A #\b)) "(?a)|(?b)") +(test-sre (: #\a ($ #\b) #\c ($ #\d)) "a(b)c(d)") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: (* nonl) numeric) ".*\\d") +(test-sre (* ($ (: #\a #\b #\c))) "(abc)*") +(test-sre bos "^") +(test-sre (: (? (: #\a #\b)) (: #\a #\b) (: #\a #\b)) "(?:ab)?abab") +(test-sre (: #\a #\b #\c) "abc") +(test-sre ($ (: #\a #\b #\c #\d)) "(abcd)") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ #\b) #\c) "a(b)c") +(test-sre (: #\a (or punct #\b)) "a{p|[b]}") +(test-sre (: #\a (or #\b punct)) "a{[b]|p}") +(test-sre (: #\0 #\b #\space #\2 #\8 #\space #\3 #\f #\space #\2 #\d #\space #\7 #\8 #\space #\2 #\9 #\space #\3 #\a) "0b 28 3f 2d 78 29 3a") +(test-sre (or #\a (: ($ #\b) #\c)) "a|(b)c") +(test-sre (: #\e #\f #\g) "efg") +(test-sre (: #\e #\f #\f) "eff") +(test-sre (: #\e #\f #\f #\g) "effg") +(test-sre (: #\a #\a #\a) "aaa") +(test-sre (neg-look-behind (or (:) (: #\! (neg-look-behind (:))))) "(? A (:))) "\\[()\\]{65535}(?)") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre eos "$") +(test-sre (: (? #\newline) eos) "\\Z") +(test-sre (: (neg-look-ahead (or #\a #\b)) (* nonl)) "(?![ab]).*") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\1 #\2 #\3 #\4 #\5 (look-behind (** 1 256 numeric)) #\X) "12345(?<=\\d{1,256})X") +(test-sre (neg-look-behind (** 9 44965 ($ (= 65054 #\space)))) "(? n (: #\f #\o #\o)) (-> n (: #\b #\a #\r))) (backref n)) "(?:(?foo)|(?bar))\\k") +(test-sre (: (? #\a) #\b (* (or #\] #\x #\y)) #\c) "a?b[\\]xy]*c") +(test-sre (* #\f) "f*") +(test-sre (: #\f #\o #\o #\*) "foo\\*") +(test-sre (: #\f #\o #\o #\* #\b #\a #\r) "foo\\*bar") +(test-sre (: #\f #\\ #\o #\o) "f\\\\oo") +(test-sre (or #\t #\e #\n) "[ten]") +(test-sre (: #\t (char-range #\a #\g) #\n) "t[a-g]n") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (or #\] #\a #\-) #\b) "a[\\]a\\-]b") +(test-sre (: #\a (or #\] #\-) #\b) "a[\\]\\-]b") +(test-sre (: #\a (or #\] (char-range #\a #\z)) #\b) "a[\\]a-z]b") +(test-sre #\] "\\]") +(test-sre (: #\t (or #\! (char-range #\a #\g)) #\n) "t[!a-g]n") +(test-sre (: #\A (char-range #\+ #\0) #\B) "A[+-0]B") +(test-sre (: #\a (char-range #\- #\0) #\z) "a[\\--0]z") +(test-sre (: #\a (or numeric #\.) #\z) "a[\\d.]z") +(test-sre (: #\A nwb #\\ #\C (~ numeric)) "A\\B\\\\C\\D") +(test-sre (: (* #\a) #\b) "a*b") +(test-sre (: #\< (or #\] #\b #\c) #\>) "<[\\]bc]>") +(test-sre (: #\< (~ (or #\] #\b #\c)) #\>) "<[^\\]bc]>") +(test-sre (: (* #\a) (+ #\b) #\c #\+ (or #\d #\e #\f) ($ (: #\a #\b)) #\( #\c #\d #\)) "a*b+c\\+[def](ab)\\(cd\\)") +(test-sre (: #\h #\o #\w nonl #\t #\o #\space #\h #\o #\w #\. #\t #\o) "how.to how\\.to") +(test-sre (: bos #\h #\o #\w #\space #\t #\o #\space #\^ #\h #\o #\w #\space #\t #\o) "^how to \\^how to") +(test-sre (: bos #\b #\( #\c bos #\d #\) #\( bos #\e bos #\f #\)) "^b\\(c^d\\)\\(^e^f\\)") +(test-sre (: #\[ ($ (:)) (= 65535 #\]) ($ (:))) "\\[()\\]{65535}()") +(test-sre (: bos #\A) "^A") +(test-sre (: bos (+ (or alnum #\_))) "^\\w+") +(test-sre (: ($ (+ nonl)) (or bow eow) ($ (+ nonl))) "(.+)\\b(.+)") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (>= 3 nonl)) #\b) "a(.{3,})b") +(test-sre (: #\a ($ (**? 3 #f nonl)) #\b) "a(.{3,}?)b") +(test-sre (: #\a ($ (** 3 5 nonl)) #\b) "a(.{3,5})b") +(test-sre (: #\a ($ (**? 3 5 nonl)) #\b) "a(.{3,5}?)b") +(test-sre (: (look-behind (: #\a #\X #\b)) #\c #\d) "(?<=aXb)cd") +(test-sre (: (look-behind ($ nonl)) #\X) "(?<=(.))X") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (* (~ numeric)) "\\D*") +(test-sre (* (~ numeric)) "\\D*") +(test-sre (~ numeric) "\\D") +(test-sre (: #\> (~ space)) ">\\S") +(test-sre numeric "\\d") +(test-sre space "\\s") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (** 2 3 (~ numeric)) "\\D{2,3}") +(test-sre (**? 2 3 (~ numeric)) "\\D{2,3}?") +(test-sre (+ numeric) "\\d+") +(test-sre (** 2 3 numeric) "\\d{2,3}") +(test-sre (**? 2 3 numeric) "\\d{2,3}?") +(test-sre (+ (~ space)) "\\S+") +(test-sre (** 2 3 (~ space)) "\\S{2,3}") +(test-sre (**? 2 3 (~ space)) "\\S{2,3}?") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (** 2 3 space) #\<) ">\\s{2,3}<") +(test-sre (: #\> (**? 2 3 space) #\<) ">\\s{2,3}?<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (** 2 3 (or alnum #\_)) "\\w{2,3}") +(test-sre (**? 2 3 (or alnum #\_)) "\\w{2,3}?") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (** 2 3 (~ (or alnum #\_))) "\\W{2,3}") +(test-sre (**? 2 3 (~ (or alnum #\_))) "\\W{2,3}?") +(test-sre (: bos (* (or #\a #\c)) #\b) "^[ac]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* numeric) #\b) "^\\d*b") +(test-sre ($ (or (:) #\a)) "(|a)") +(test-sre (: (~ space) (~ space)) "\\S\\S") +(test-sre (= 2 (~ space)) "\\S{2}") +(test-sre (: (~ (or alnum #\_)) (~ (or alnum #\_))) "\\W\\W") +(test-sre (= 2 (~ (or alnum #\_))) "\\W{2}") +(test-sre (~ space) "\\S") +(test-sre (~ numeric) "\\D") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: nonl (~ (or (~ space) #\newline)) nonl) ".[^\\S\n].") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (: bos (*? (~ #\d)) eos) "^[^d]*?$") +(test-sre (* #\A) "A*") +(test-sre nonl ".") +(test-sre (: bos (* numeric) (= 4 (or alnum #\_))) "^\\d*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos (* (~ #\b)) (= 4 (or alnum #\_))) "^[^b]*\\w{4}") +(test-sre (: bos nonl nwb nonl nwb nonl) "^.\\B.\\B.") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (: bos (+ (or alnum #\_))) "^\\w+") +(test-sre (: bos (+ numeric)) "^\\d+") +(test-sre (: bos #\> (+ space)) "^>\\s+") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (+ (or #\R #\S #\T)) "[RST]+") +(test-sre (+ (char-range #\R #\T)) "[R-T]+") +(test-sre (+ (char-range #\q #\u)) "[q-u]+") +(test-sre (: bos (? #\s) #\c) "^s?c") +(test-sre (char-range #\A #\`) "[A-`]") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) (**? 1 #f nonl) (or bow eow)) "\\b.+?\\b") +(test-sre (: #\c #\a #\f nwb (**? 1 #f nonl) nwb) "caf\\B.+?\\B") +(test-sre (: #\c #\3 #\space #\b #\1) "c3 b1") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (or alnum #\_) "\\w") +(test-sre (: #\X (** 2 4 #\a) #\b) "Xa{2,4}b") +(test-sre (: #\X (**? 2 4 #\a) #\b) "Xa{2,4}?b") +(test-sre (: #\X (+ (** 2 4 #\a)) #\b) "X(?:a{2,4})+b") +(test-sre (: #\X (** 2 4 numeric) #\b) "X\\d{2,4}b") +(test-sre (: #\X (**? 2 4 numeric) #\b) "X\\d{2,4}?b") +(test-sre (: #\X (+ (** 2 4 numeric)) #\b) "X(?:\\d{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (~ numeric)) #\b) "X\\D{2,4}b") +(test-sre (: #\X (**? 2 4 (~ numeric)) #\b) "X\\D{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ numeric))) #\b) "X(?:\\D{2,4})+b") +(test-sre (: #\X (** 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}b") +(test-sre (: #\X (**? 2 4 (or #\a #\b #\c)) #\b) "X[abc]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (or #\a #\b #\c))) #\b) "X(?:[abc]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: #\X (** 2 4 (~ #\a)) #\b) "X[^a]{2,4}b") +(test-sre (: #\X (**? 2 4 (~ #\a)) #\b) "X[^a]{2,4}?b") +(test-sre (: #\X (+ (** 2 4 (~ #\a))) #\b) "X(?:[^a]{2,4})+b") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: #\X (= 3 (~ (or alnum #\_))) #\X) "X\\W{3}X") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: #\f (* nonl)) "f.*") +(test-sre (: (neg-look-behind bos) #\E #\T #\A) "(? A (: #\s #\s)) (-> A (: #\k #\k))) #\space (backref A)) "(?:(?ss)|(?kk)) \\k") +(test-sre (: (or (-> A #\s) (-> A #\k)) #\space (>= 3 (backref A)) #\!) "(?:(?s)|(?k)) \\k{3,}!") +(test-sre #\i "i") +(test-sre #\I "I") +(test-sre #\i "i") +(test-sre (~ #\i) "[^i]") +(test-sre (or #\z #\i) "[zi]") +(test-sre (or #\i #\I) "[iI]") +(test-sre (+ numeric) "\\d+") +(test-sre (+ numeric) "\\d+") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) #\A #\B #\C (or bow eow)) "\\bABC\\b") +(test-sre (: (or bow eow) #\A #\B #\C (or bow eow)) "\\bABC\\b") +(test-sre (neg-look-behind ($ (or (:) (: #\l #\space)))) "(?= 2 #\b) #\x (** 4 5 #\y) #\p (** 0 6 #\q) #\A (>= 0 #\B) #\z #\z) "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB*zz") +(test-sre (: bos (** 1 2 ($ (: #\a #\b #\c))) #\z #\z) "^(abc){1,2}zz") +(test-sre (: bos (**? 1 2 ($ (or (**? 1 #f #\b) #\a))) #\c) "^(b+?|a){1,2}?c") +(test-sre (: bos (** 1 2 ($ (or (+ #\b) #\a))) #\c) "^(b+|a){1,2}c") +(test-sre (: bos (**? 1 2 ($ (or (+ #\b) #\a))) #\b #\c) "^(b+|a){1,2}?bc") +(test-sre (: bos (**? 1 2 ($ (or (* #\b) (: #\b #\a)))) #\b #\c) "^(b*|ba){1,2}?bc") +(test-sre (: bos (**? 1 2 ($ (or (: #\b #\a) (* #\b)))) #\b #\c) "^(ba|b*){1,2}?bc") +(test-sre (: bos (or #\a #\b #\] #\c #\d #\e)) "^[ab\\]cde]") +(test-sre (: bos (or #\] #\c #\d #\e)) "^[\\]cde]") +(test-sre (: bos (~ (or #\a #\b #\] #\c #\d #\e))) "^[^ab\\]cde]") +(test-sre (: bos (~ (or #\] #\c #\d #\e))) "^[^\\]cde]") +(test-sre (: bos #\@) "^@") +(test-sre (: bos (+ (char-range #\0 #\9)) eos) "^[0-9]+$") +(test-sre (: bos (* nonl) #\n #\t #\e #\r) "^.*nter") +(test-sre (: bos #\x #\x #\x (+ (char-range #\0 #\9)) eos) "^xxx[0-9]+$") +(test-sre (: bos (+ nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+[0-9][0-9][0-9]$") +(test-sre (: bos (**? 1 #f nonl) (char-range #\0 #\9) (char-range #\0 #\9) (char-range #\0 #\9) eos) "^.+?[0-9][0-9][0-9]$") +(test-sre (: bos ($ (+ (~ #\!))) #\! ($ (+ nonl)) #\= #\a #\p #\q #\u #\x #\z #\. #\i #\x #\r #\. #\z #\z #\z #\. #\a #\c #\. #\u #\k eos) "^([^!]+)!(.+)=apquxz\\.ixr\\.zzz\\.ac\\.uk$") +(test-sre #\: ":") +(test-sre (: bos (* nonl) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) #\. ($ (** 1 3 numeric)) eos) "^.*\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$") +(test-sre (: bos ($ (+ numeric)) (+ space) #\I #\N (+ space) #\S #\O #\A (+ space) ($ (+ (~ space))) (+ space) ($ (+ (~ space))) (* space) #\( (* space) eos) "^(\\d+)\\s+IN\\s+SOA\\s+(\\S+)\\s+(\\S+)\\s*\\(\\s*$") +(test-sre (: bos (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-)) (* ($ (: #\. (or (char-range #\a #\z) (char-range #\A #\Z) numeric) (* (or (char-range #\a #\z) (char-range #\A #\Z) numeric #\-))))) #\. eos) "^[a-zA-Z\\d][a-zA-Z\\d\\-]*(\\.[a-zA-Z\\d][a-zA-Z\\d\\-]*)*\\.$") +(test-sre (: bos #\* #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric))))) (* ($ (: #\. (char-range #\a #\z) (? ($ (: (* (or (char-range #\a #\z) #\- numeric)) (+ (or (char-range #\a #\z) numeric)))))))) eos) "^\\*\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?(\\.[a-z]([a-z\\-\\d]*[a-z\\d]+)?)*$") +(test-sre (: bos (look-ahead (: #\a #\b ($ (: #\d #\e)))) ($ (: #\a #\b #\d)) ($ #\e)) "^(?=ab(de))(abd)(e)") +(test-sre (: bos (neg-look-ahead (or (: ($ (: #\a #\b)) #\d #\e) #\x)) ($ (: #\a #\b #\d)) ($ #\f)) "^(?!(ab)de|x)(abd)(f)") +(test-sre (: bos (look-ahead ($ (: #\a #\b ($ (: #\c #\d))))) ($ (: #\a #\b))) "^(?=(ab(cd)))(ab)") +(test-sre (: bos eos) "^$") +(test-sre (: bos #\a #\space #\b (or #\c #\space) #\d eos) "^a b[c ]d$") +(test-sre (: bos ($ (: #\a ($ (: #\b ($ #\c))))) ($ (: #\d ($ (: #\e ($ #\f))))) ($ (: #\h ($ (: #\i ($ #\j))))) ($ (: #\k ($ (: #\l ($ #\m))))) eos) "^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$") +(test-sre (: bos (: #\a ($ (: #\b ($ #\c)))) (: #\d ($ (: #\e ($ #\f)))) (: #\h ($ (: #\i ($ #\j)))) (: #\k ($ (: #\l ($ #\m)))) eos) "^a(b(c))d(e(f))h(i(j))k(l(m))$") +(test-sre (: bos (+ (or #\. #\^ #\$ #\| #\( #\) #\* #\+ #\? #\{ #\, #\}))) "^[.\\^$|()*+?{,}]+") +(test-sre (: bos (* #\a) (or alnum #\_)) "^a*\\w") +(test-sre (: bos (*? #\a) (or alnum #\_)) "^a*?\\w") +(test-sre (: bos (+ #\a) (or alnum #\_)) "^a+\\w") +(test-sre (: bos (**? 1 #f #\a) (or alnum #\_)) "^a+?\\w") +(test-sre (: bos (= 8 numeric) (>= 2 (or alnum #\_))) "^\\d{8}\\w{2,}") +(test-sre (: bos (** 4 5 (or #\a #\e #\i #\o #\u numeric)) eos) "^[aeiou\\d]{4,5}$") +(test-sre (: bos (**? 4 5 (or #\a #\e #\i #\o #\u numeric))) "^[aeiou\\d]{4,5}?") +(test-sre (: bos #\1 #\2 nonl #\3 #\4) "^12.34") +(test-sre (: #\f #\o #\o (neg-look-ahead (: #\b #\a #\r)) ($ (* nonl))) "foo(?!bar)(.*)") +(test-sre (: (or (: (neg-look-ahead (: #\f #\o #\o)) nonl nonl nonl) (: bos (** 0 2 nonl))) #\b #\a #\r ($ (* nonl))) "(?:(?!foo)...|^.{0,2})bar(.*)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: (neg-look-ahead bos) #\a #\b #\c) "{!= 8 numeric) #\@ (+ nonl) (~ #\k) eos) "^\\d{8,}@.+[^k]$") +(test-sre (~ #\a) "[^a]") +(test-sre (~ (or #\a #\z)) "[^az]") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 6 (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]{1,6}?LL") +(test-sre (: #\P (~ #\*) #\T #\A #\I #\R #\E (**? 1 #f (~ #\*)) #\L #\L) "P[^*]TAIRE[^*]+?LL") +(test-sre (: ($ (: #\. numeric numeric (? (char-range #\1 #\9)))) (+ numeric)) "(\\.\\d\\d[1-9]?)\\d+") +(test-sre ($ (: #\. numeric numeric ($ (or (look-ahead #\0) (: numeric (look-ahead numeric)))))) "(\\.\\d\\d((?=0)|\\d(?=\\d)))") +(test-sre (: #\f #\o #\o ($ (* nonl)) #\b #\a #\r) "foo(.*)bar") +(test-sre (: #\f #\o #\o ($ (*? nonl)) #\b #\a #\r) "foo(.*?)bar") +(test-sre (: ($ (* nonl)) ($ (+ numeric))) "(.*)(\\d+)") +(test-sre (: ($ (*? nonl)) ($ (+ numeric))) "(.*?)(\\d+)") +(test-sre (: ($ (* nonl)) ($ (+ numeric)) eos) "(.*)(\\d+)$") +(test-sre (: ($ (*? nonl)) ($ (+ numeric)) eos) "(.*?)(\\d+)$") +(test-sre (: ($ (* nonl)) (or bow eow) ($ (+ numeric)) eos) "(.*)\\b(\\d+)$") +(test-sre (: ($ (: (* nonl) (~ numeric))) ($ (+ numeric)) eos) "(.*\\D)(\\d+)$") +(test-sre (: bos (* (~ numeric)) (neg-look-ahead (: #\1 #\2 #\3))) "^\\D*(?!123)") +(test-sre (: bos ($ (* (~ numeric))) (look-ahead numeric) (neg-look-ahead (: #\1 #\2 #\3))) "^(\\D*)(?=\\d)(?!123)") +(test-sre (: bos (or (char-range #\W #\]) #\4 #\6)) "^[W-\\]46]") +(test-sre (: #\w #\o #\r #\d #\space (** 0 10 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,10}otherword") +(test-sre (: #\w #\o #\r #\d #\space (** 0 300 (: (+ (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9))) #\space)) #\o #\t #\h #\e #\r #\w #\o #\r #\d) "word (?:[a-zA-Z0-9]+ ){0,300}otherword") +(test-sre (: bos (= 0 ($ #\a))) "^(a){0}") +(test-sre (: bos (** 0 1 ($ #\a))) "^(a)?") +(test-sre (: bos (** 0 2 ($ #\a))) "^(a){0,2}") +(test-sre (: bos (** 0 3 ($ #\a))) "^(a){0,3}") +(test-sre (: bos (>= 0 ($ #\a))) "^(a)*") +(test-sre (: bos (= 1 ($ #\a))) "^(a)") +(test-sre (: bos (** 1 2 ($ #\a))) "^(a){1,2}") +(test-sre (: bos (** 1 3 ($ #\a))) "^(a){1,3}") +(test-sre (: bos (>= 1 ($ #\a))) "^(a)+") +(test-sre (: (* nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (>= 0 nonl) #\. #\g #\i #\f) ".*\\.gif") +(test-sre (: (* nonl) eos) ".*$") +(test-sre ($ (or (: (* nonl) #\X) (: bos #\B))) "(.*X|^B)") +(test-sre (: bos (* nonl) #\B) "^.*B") +(test-sre (: bol (* nonl) #\B) "{= 0 #\b) #\b #\c) "ab*bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (+ #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (>= 1 #\b) #\b #\c) "ab+bc") +(test-sre (: #\a (** 1 3 #\b) #\b #\c) "ab{1,3}bc") +(test-sre (: #\a (** 3 4 #\b) #\b #\c) "ab{3,4}bc") +(test-sre (: #\a (** 4 5 #\b) #\b #\c) "ab{4,5}bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (** 0 1 #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\b #\c) "ab?bc") +(test-sre (: #\a (? #\b) #\c) "ab?c") +(test-sre (: #\a (** 0 1 #\b) #\c) "ab?c") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: bos #\a #\b #\c) "^abc") +(test-sre (: bos #\a #\b #\c eos) "^abc$") +(test-sre (: #\a #\b #\c eos) "abc$") +(test-sre bos "^") +(test-sre eos "$") +(test-sre (: #\a nonl #\c) "a.c") +(test-sre (: #\a (* nonl) #\c) "a.*c") +(test-sre (: #\a (or #\b #\c) #\d) "a[bc]d") +(test-sre (: #\a (char-range #\b #\d) #\e) "a[b-d]e") +(test-sre (: #\a (char-range #\b #\d)) "a[b-d]") +(test-sre (: #\a (or #\- #\b)) "a[\\-b]") +(test-sre (: #\a (or #\b #\-)) "a[b\\-]") +(test-sre (: #\a #\] #\b) "a\\]b") +(test-sre (: #\a (~ (or #\b #\c)) #\d) "a[^bc]d") +(test-sre (: #\a (~ (or #\- #\b)) #\c) "a[^\\-b]c") +(test-sre (: #\a (~ (or #\] #\b)) #\c) "a[^\\]b]c") +(test-sre (: (or bow eow) #\a (or bow eow)) "\\ba\\b") +(test-sre (: (or bow eow) #\y (or bow eow)) "\\by\\b") +(test-sre (: nwb #\a nwb) "\\Ba\\B") +(test-sre (: nwb #\y (or bow eow)) "\\By\\b") +(test-sre (: (or bow eow) #\y nwb) "\\by\\B") +(test-sre (: nwb #\y nwb) "\\By\\B") +(test-sre (or alnum #\_) "\\w") +(test-sre (~ (or alnum #\_)) "\\W") +(test-sre (: #\a space #\b) "a\\sb") +(test-sre (: #\a (~ space) #\b) "a\\Sb") +(test-sre numeric "\\d") +(test-sre (~ numeric) "\\D") +(test-sre (or (: #\a #\b) (: #\c #\d)) "ab|cd") +(test-sre (: ($ (:)) #\e #\f) "()ef") +(test-sre (: eos #\b) "$b") +(test-sre (: #\a #\( #\b) "a\\(b") +(test-sre (: #\a (* #\() #\b) "a\\(*b") +(test-sre (: #\a #\\ #\b) "a\\\\b") +(test-sre ($ ($ #\a)) "((a))") +(test-sre (: ($ #\a) #\b ($ #\c)) "(a)b(c)") +(test-sre (: (+ #\a) (+ #\b) #\c) "a+b+c") +(test-sre (: (>= 1 #\a) (>= 1 #\b) #\c) "a+b+c") +(test-sre (: #\a (**? 1 #f nonl) #\c) "a.+?c") +(test-sre (* ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (>= 0 ($ (or (+ #\a) #\b))) "(a+|b)*") +(test-sre (+ ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (>= 1 ($ (or (+ #\a) #\b))) "(a+|b)+") +(test-sre (? ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (** 0 1 ($ (or (+ #\a) #\b))) "(a+|b)?") +(test-sre (* (~ (or #\a #\b))) "[^ab]*") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (* #\a) "a*") +(test-sre (: (* ($ (or #\a #\b #\c))) #\d) "([abc])*d") +(test-sre (: (* ($ (or #\a #\b #\c))) #\b #\c #\d) "([abc])*bcd") +(test-sre (or #\a #\b #\c #\d #\e) "[abcde]") +(test-sre (: ($ (or #\a #\b #\c #\d #\e)) #\f) "([abcde])f") +(test-sre (: #\a #\b #\c (* #\d) #\e #\f #\g) "abcd*efg") +(test-sre (: #\a (* #\b)) "ab*") +(test-sre (: ($ (or (: #\a #\b) (: #\c #\d))) #\e) "(ab|cd)e") +(test-sre (: (or #\a #\b #\h #\g #\e #\f #\d #\c) #\i #\j) "[abhgefdc]ij") +(test-sre (: bos ($ (or (: #\a #\b) (: #\c #\d))) #\e) "^(ab|cd)e") +(test-sre (: ($ (or (: #\a #\b #\c) (:))) #\e #\f) "(abc|)ef") +(test-sre (: ($ (or #\a #\b)) (* #\c) #\d) "([ab])c*d") +(test-sre (: ($ (or (: #\a #\b) (: #\a (* #\b)))) #\b #\c) "(ab|ab*)bc") +(test-sre (: #\a ($ (* (or #\b #\c))) (* #\c)) "a([bc]*)c*") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]*)(c*d)") +(test-sre (: #\a ($ (+ (or #\b #\c))) ($ (: (* #\c) #\d))) "a([bc]+)(c*d)") +(test-sre (: #\a ($ (* (or #\b #\c))) ($ (: (+ #\c) #\d))) "a([bc]*)(c+d)") +(test-sre (: #\a (* (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]*dcdcde") +(test-sre (: #\a (+ (or #\b #\c #\d)) #\d #\c #\d #\c #\d #\e) "a[bcd]+dcdcde") +(test-sre (: ($ (or (: #\a #\b) #\a)) (* #\b) #\c) "(ab|a)b*c") +(test-sre (: ($ (: ($ #\a) ($ #\b) #\c)) ($ #\d)) "((a)(b)c)(d)") +(test-sre (: (or (char-range #\a #\z) (char-range #\A #\Z) #\_) (* (or (char-range #\a #\z) (char-range #\A #\Z) (char-range #\0 #\9) #\_))) "[a-zA-Z_][a-zA-Z0-9_]*") +(test-sre (or (: bos #\a ($ (or (: #\b (+ #\c)) (: #\b (or #\e #\h)))) #\g) (: nonl #\h eos)) "^a(bc+|b[eh])g|.h$") +(test-sre ($ (or (: #\b (+ #\c) #\d eos) (: #\e (* #\f) #\g nonl) (: (? #\h) #\i ($ (or #\j #\k))))) "(bc+d$|ef*g.|h?i([jk]))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a)))))))))) "((((((((((a))))))))))") +(test-sre ($ ($ ($ ($ ($ ($ ($ ($ ($ #\a))))))))) "(((((((((a)))))))))") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s #\space #\o #\f #\space #\t #\e #\x #\t) "multiple words of text") +(test-sre (: #\m #\u #\l #\t #\i #\p #\l #\e #\space #\w #\o #\r #\d #\s) "multiple words") +(test-sre (: ($ (* nonl)) #\c ($ (* nonl))) "(.*)c(.*)") +(test-sre (: #\( ($ (* nonl)) #\, #\space ($ (* nonl)) #\)) "\\((.*), (.*)\\)") +(test-sre #\k "k") +(test-sre (: #\a #\b #\c #\d) "abcd") +(test-sre (: #\a ($ (: #\b #\c)) #\d) "a(bc)d") +(test-sre (: #\a (? #\-) #\c) "a-?c") +(test-sre (: #\a (neg-look-ahead #\b) nonl) "a(?!b).") +(test-sre (: #\a (look-ahead #\d) nonl) "a(?=d).") +(test-sre (: #\a (look-ahead (or #\c #\d)) nonl) "a(?=[cd]).") +(test-sre (: #\a (or #\b #\c #\d) ($ nonl)) "a[bcd](.)") +(test-sre (: #\a (* (or #\b #\c #\d)) ($ nonl)) "a[bcd]*(.)") +(test-sre (: #\a (**? 1 #f (or #\b #\c #\d)) ($ nonl)) "a[bcd]+?(.)") +(test-sre (: #\a (+ (or #\b #\c #\d)) ($ nonl)) "a[bcd]+(.)") +(test-sre (: #\a (= 2 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{2}(.)") +(test-sre (: #\a (** 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}(.)") +(test-sre (: #\a (**? 4 5 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{4,5}?(.)") +(test-sre (* ($ (or ($ (: #\f #\o #\o)) ($ (: #\b #\a #\r))))) "((foo)|(bar))*") +(test-sre (: #\a (** 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}(.)") +(test-sre (: #\a (**? 6 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{6,7}?(.)") +(test-sre (: #\a (** 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}(.)") +(test-sre (: #\a (**? 5 6 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,6}?(.)") +(test-sre (: #\a (** 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}(.)") +(test-sre (: #\a (**? 5 7 (or #\b #\c #\d)) ($ nonl)) "a[bcd]{5,7}?(.)") +(test-sre (: #\a (**? 1 #f (or #\b (**? 1 2 ($ (or #\c #\e))) #\d)) ($ nonl)) "a(?:b|([ce]){1,2}?|d)+?(.)") +(test-sre (: bos (? ($ (+ nonl))) #\B) "^(.+)?B") +(test-sre (or (: bos ($ (~ (char-range #\a #\z)))) (: ($ #\^) eos)) "^([^a-z])|(\\^)$") +(test-sre (: bos (or #\< #\>) #\&) "^[<>]&") +(test-sre (* (or (: ($ #\f) ($ #\o) ($ #\o)) (: ($ #\b) ($ #\a) ($ #\r)))) "(?:(f)(o)(o)|(b)(a)(r))*") +(test-sre (: (look-behind #\a) #\b) "(?<=a)b") +(test-sre (: (neg-look-behind #\c) #\b) "(? (+ #\a))) #\a #\b) "(>a+)ab") +(test-sre (: #\a eos) "a$") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)))) #\f #\o #\o) "(?<=\\d{3}(?!999))foo") +(test-sre (: (look-behind (: (neg-look-ahead (: nonl nonl nonl #\9 #\9 #\9)) (= 3 numeric))) #\f #\o #\o) "(?<=(?!...999)\\d{3})foo") +(test-sre (: (look-behind (: (= 3 numeric) (neg-look-ahead (: #\9 #\9 #\9)) nonl nonl nonl)) #\f #\o #\o) "(?<=\\d{3}(?!999)...)foo") +(test-sre (: (look-behind (: (= 3 numeric) nonl nonl nonl)) (neg-look-behind (: #\9 #\9 #\9)) #\f #\o #\o) "(?<=\\d{3}...)(?= 2 (or #\a #\b)) "[ab]{2,}") +(test-sre (**? 2 #f (or #\a #\b)) "[ab]{2,}?") +(test-sre (: #\a #\b #\c (look-ahead (: #\x #\y #\z))) "abc(?=xyz)") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre (: #\a (or bow eow)) "a\\b") +(test-sre (: #\a #\b #\c (look-ahead (: #\a #\b #\c #\d #\e)) (look-ahead (: #\a #\b))) "abc(?=abcde)(?=ab)") +(test-sre (: (*? #\a) (*? #\b)) "a*?b*?") +(test-sre (or (: ($ #\a) ($ #\b)) ($ #\c)) "(a)(b)|(c)") +(test-sre (-> A (: #\a #\a)) "(?aa)") +(test-sre (: #\a ($ #\b) #\c ($ #\d)) "a(b)c(d)") +(test-sre bos "^") +(test-sre (: (? ($ (: #\0 #\2 #\-))) (= 3 (char-range #\0 #\9)) #\- (= 3 (char-range #\0 #\9))) "(02-)?[0-9]{3}-[0-9]{3}") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (: #\a #\b #\c) "abc") +(test-sre (or (: #\a #\b #\c) (: #\b #\c #\d)) "abc|bcd") +(test-sre (look-behind (or (: #\a #\b #\c) (:))) "(?<=abc|)") +(test-sre (look-behind (or (: #\a #\b #\c) (:))) "(?<=abc|)") +(test-sre (look-behind (or (:) (: #\a #\b #\c))) "(?<=|abc)") +(test-sre (or #\a #\b #\c) "[abc]") +(test-sre (: #\f #\o #\o #\b #\a #\r) "foobar") +(test-sre (: #\f #\o #\o #\b #\a #\r) "foobar") +(test-sre (: (look-behind (: #\p #\q #\r)) #\a #\b #\c (look-ahead (: #\x #\y #\z))) "(?<=pqr)abc(?=xyz)") +(test-sre eos "$") +(test-sre (: (? #\newline) eos) "\\Z") +(test-sre (look-behind (look-ahead (: nonl (look-behind #\x)))) "(?<=(?=.(?<=x)))") +(test-sre (: (neg-look-ahead (or #\a #\b)) (* nonl)) "(?![ab]).*") +(test-sre (or #\a #\[) "[a\\[]") +(test-sre (: (or bow eow) #\X) "\\bX") +(test-sre (: nwb #\X) "\\BX") +(test-sre (: #\X (or bow eow)) "X\\b") +(test-sre (: #\X nwb) "X\\B") +(test-sre (~ #\a) "[^a]") +(test-sre (: #\a nonl #\b) "a.b") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (*? nonl)) ($ nonl)) "a(.*?)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ (* nonl)) ($ nonl)) "a(.*)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ nonl) ($ nonl)) "a(.)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (? nonl)) ($ nonl)) "a(.?)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (?? nonl)) ($ nonl)) "a(.??)(.)") +(test-sre (: #\a ($ (= 3 nonl)) #\b) "a(.{3})b") +(test-sre (: #\a ($ (>= 3 nonl)) #\b) "a(.{3,})b") +(test-sre (: #\a ($ (**? 3 #f nonl)) #\b) "a(.{3,}?)b") +(test-sre (: #\a ($ (** 3 5 nonl)) #\b) "a(.{3,5})b") +(test-sre (: #\a ($ (**? 3 5 nonl)) #\b) "a(.{3,5}?)b") +(test-sre (: (look-behind (: #\a #\X #\b)) #\c #\d) "(?<=aXb)cd") +(test-sre (: (look-behind ($ nonl)) #\X) "(?<=(.))X") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (+ (~ #\a)) "[^a]+") +(test-sre (: bos (= 2 (~ #\a))) "^[^a]{2}") +(test-sre (: bos (>= 2 (~ #\a))) "^[^a]{2,}") +(test-sre (: bos (**? 2 #f (~ #\a))) "^[^a]{2,}?") +(test-sre (~ numeric) "\\D") +(test-sre (: #\> (~ space)) ">\\S") +(test-sre numeric "\\d") +(test-sre space "\\s") +(test-sre (+ (~ numeric)) "\\D+") +(test-sre (** 2 3 (~ numeric)) "\\D{2,3}") +(test-sre (**? 2 3 (~ numeric)) "\\D{2,3}?") +(test-sre (+ numeric) "\\d+") +(test-sre (** 2 3 numeric) "\\d{2,3}") +(test-sre (**? 2 3 numeric) "\\d{2,3}?") +(test-sre (+ (~ space)) "\\S+") +(test-sre (** 2 3 (~ space)) "\\S{2,3}") +(test-sre (**? 2 3 (~ space)) "\\S{2,3}?") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (** 2 3 space) #\<) ">\\s{2,3}<") +(test-sre (: #\> (**? 2 3 space) #\<) ">\\s{2,3}?<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (** 2 3 (or alnum #\_)) "\\w{2,3}") +(test-sre (**? 2 3 (or alnum #\_)) "\\w{2,3}?") +(test-sre (+ (~ (or alnum #\_))) "\\W+") +(test-sre (** 2 3 (~ (or alnum #\_))) "\\W{2,3}") +(test-sre (**? 2 3 (~ (or alnum #\_))) "\\W{2,3}?") +(test-sre (: bos (* (or #\a #\c)) #\b) "^[ac]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* (~ #\x)) #\b) "^[^x]*b") +(test-sre (: bos (* numeric) #\b) "^\\d*b") +(test-sre ($ (or (:) #\a)) "(|a)") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* #\d)) "abcd*") +(test-sre (: #\a #\b #\c (* numeric)) "abc\\d*") +(test-sre (: #\a #\b #\c (* (or #\d #\e))) "abc[de]*") +(test-sre (: (or bow eow) #\t #\h #\e #\space #\c #\a #\t (or bow eow)) "\\bthe cat\\b") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: (or bow eow) nonl nonl nonl nwb) "\\b...\\B") +(test-sre (: #\i #\s #\t) "ist") +(test-sre (: #\i (+ #\s) #\t) "is+t") +(test-sre (: #\i (**? 1 #f #\s) #\t) "is+?t") +(test-sre (: #\i (? #\s) #\t) "is?t") +(test-sre (: #\i (= 2 #\s) #\t) "is{2}t") +(test-sre (: bos #\A (+ space) #\Z) "^A\\s+Z") +(test-sre (: #\A #\s #\k #\Z) "AskZ") +(test-sre (+ (or #\A #\s #\k #\Z)) "[AskZ]+") +(test-sre (+ (~ #\s)) "[^s]+") +(test-sre (+ (~ #\s)) "[^s]+") +(test-sre (+ (~ #\k)) "[^k]+") +(test-sre (+ (~ #\k)) "[^k]+") +(test-sre (+ (~ (or #\s #\k))) "[^sk]+") +(test-sre (+ (~ (or #\s #\k))) "[^sk]+") +(test-sre #\i "i") +(test-sre #\I "I") +(test-sre (or #\z #\i) "[zi]") +(test-sre (or #\i #\I) "[iI]") +(test-sre (+ numeric) "\\d+") +(test-sre (+ numeric) "\\d+") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (: #\> (+ space) #\<) ">\\s+<") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (+ (or alnum #\_)) "\\w+") +(test-sre (: (or bow eow) (**? 1 #f nonl) (or bow eow)) "\\b.+?\\b") +(test-sre (: #\c #\a #\f nwb (**? 1 #f nonl) nwb) "caf\\B.+?\\B") +(test-sre (+ (** 1 3 #\x)) "(?:x{1,3})+") +(test-sre (~ #\a) "[^a]") +(test-sre (: (look-behind (: #\C #\newline)) bos) "(?<=C\n)^") +(test-sre (: (+ (or alnum #\_)) (look-ahead #\tab)) "\\w+(?=\t)") +(test-sre (w/nocase (: #\A #\space #\s #\t #\r #\i #\n #\g)) "(?i)A string") +(test-sre (w/nocase (: ($ (* (~ #\.))) #\. ($ (* (~ #\:))) #\: (+ (or #\T #\space)) ($ (* nonl)))) "(?i)([^.]*)\\.([^:]*):[T ]+(.*)") +(test-sre (w/nocase (: bos (+ (char-range #\W #\c)) eos)) "(?i)^[W-c]+$") +(test-sre (w/nocase (~ #\a)) "(?i)[^a]") +(test-sre (: (w/nocase (or (: #\s #\a #\t #\u #\r #\d #\a #\y) (: #\s #\u #\n #\d #\a #\y))) #\:) "(?i:saturday|sunday):") +(test-sre (: (w/nocase #\a) #\b) "(?i:a)b") +(test-sre (w/nocase (: ($ (w/nocase #\a)) #\b)) "(?i)((?i:a))b") +(test-sre (w/nocase (: (w/case #\a) #\b)) "(?i)(?-i:a)b") +(test-sre (w/nocase (: ($ (w/case #\a)) #\b)) "(?i)((?-i:a))b") +(test-sre (w/nocase (: (w/case #\a) #\b)) "(?i)(?-i:a)b") +(test-sre (w/nocase (: (? ($ (: (+ (or (or alnum #\_) #\:))) #\: #\:))) ($ (+ (or alnum #\_))) eos) "(?i)([\\w:]+::)?(\\w+)$") +(test-sre (w/nocase (: #\A #\B (w/case #\C))) "(?i)AB(?-i:C)") +(test-sre (w/nocase (: #\r #\e #\g (or (: #\u #\l (or #\a #\@ (: #\a #\e)) #\r) (: #\e #\x)))) "(?i)reg(?:ul(?:[a@]|ae)r|ex)") +(test-sre (w/nocase (* (~ #\a))) "(?i)[^a]*") +(test-sre (w/nocase (: (*? (~ #\a)) #\X)) "(?i)[^a]*?X") +(test-sre (w/nocase (: (**? 1 #f (~ #\a)) #\X)) "(?i)[^a]+?X") +(test-sre (w/nocase (: (? (~ #\a)) #\X)) "(?i)[^a]?X") +(test-sre (w/nocase (: (?? (~ #\a)) #\X)) "(?i)[^a]??X") +(test-sre (w/nocase (** 2 3 (~ #\a))) "(?i)[^a]{2,3}") +(test-sre (w/nocase (**? 2 3 (~ #\a))) "(?i)[^a]{2,3}?") +(test-sre (w/nocase (: (look-behind (= 2 #\a)) #\b)) "(?i)(?<=a{2})b") +(test-sre (w/nocase (: (neg-look-behind (= 2 #\a)) #\b)) "(?i)(?symbol "|")) ; r7rs: '|\|| +(define arrsym (string->symbol "->")) ; r6rs/r7rs: '-> + (define-syntax receive (syntax-rules () ((receive formals expression body ...) @@ -26,7 +28,7 @@ (- (char->integer c) (char->integer #\0))) (define (fail s msg . args) - (raise (list 'string-sre->sre s msg args))) + (raise (list 'ssre->sre s msg args))) (define opar #\() (define cpar #\)) @@ -51,7 +53,14 @@ ((eqv? (car p) (car s)) (prefix? (cdr p) (cdr s))) (else #f))) +(define (sre=? s1 s2) + (or (eqv? s1 s2) + ; (and (char-set? s1) (char-set? s2) (char-set= s1 s2)) ; requires srfi 14 + (and (string? s1) (string? s2) (string=? s1 s2)) + (and (pair? s1) (pair? s2) (sre=? (car s1) (car s2)) (sre=? (cdr s1) (cdr s2))))) + ; Option flags are symbols from the {i, m, s, x, n, u} set +; For convenience, they are combined with definitions in a single env (define o-set? memq) (define (o-add f o) @@ -64,6 +73,10 @@ (cond ((null? o) #f) ((and (pair? (car o)) (eq? n (caar o))) (car o)) (else (o-lookup n (cdr o))))) +(define (o-reverse-lookup sre o) + (cond ((null? o) #f) + ((and (pair? (car o)) (sre=? sre (caddar o))) (car o)) + (else (o-reverse-lookup sre (cdr o))))) (define (o-skip s o) (if (o-set? 'x o) (skip s) s)) (define (o-wrappers o0 o1) ;=> (w/x w/y ...) @@ -78,9 +91,19 @@ (loop (o-del 'i o) (cons 'w/nocase wl))) (else wl)))) +; NB: if your implementation of the SRE SRFI supports infinite upper bounds in the +; **/**? forms, return the corresponding SRE value; otherwise, uncomment the 'fail' +; variants. Here we assume that such a bound is denoted by #f, as in IrRegex and +; Alex Shinn's reference implementation +(define (infub) #f) ; e.g. #f, fx-greatest, +inf.0, ... +(define (infub? x) (eqv? x #f)) +; (define (infub) (fail "no support for infinite upper bounds in **/**?")) +; (define (infub? x) #f) + ; SRE consructors (define (e-e) '(:)) +(define (e-null) '(or)) (define (e-bos o) (if (o-set? 'm o) 'bol 'bos)) (define (e-eos o) (if (o-set? 'm o) 'eol 'eos)) (define (e-dot o) (if (o-set? 's o) 'any 'nonl)) @@ -105,19 +128,22 @@ (define (with-e wl e) (if (null? wl) e (list (car wl) (with-e (cdr wl) e)))) (define (or-e e1 e2) - (if (and (pair? e1) (eqv? (car e1) 'or)) - (append e1 (list e2)) - (list 'or e1 e2))) + (cond ((equal? e2 '(or)) e1) + ((and (pair? e1) (eqv? (car e1) 'or)) + (append e1 (list e2))) + (else (list 'or e1 e2)))) (define (and-e e1 e2) - (cond ((and (pair? e1) (eqv? (car e1) 'and)) + (cond ((or (equal? e1 '(or)) (equal? e2 '(or))) '(or)) + ((and (pair? e1) (eqv? (car e1) 'and)) (append e1 (list e2))) ((and (pair? e2) (eqv? (car e2) '~)) (list '- e1 (cadr e2))) (else (list 'and e1 e2)))) (define (diff-e e1 e2) - (if (and (pair? e1) (eqv? (car e1) 'diff)) - (append e1 (list e2)) - (list '- e1 e2))) + (cond ((or (equal? e1 '(or)) (equal? e2 '(or))) e1) + ((and (pair? e1) (eqv? (car e1) 'diff)) + (append e1 (list e2))) + (else (list '- e1 e2)))) (define (range-e e1 e2) (list 'char-range e1 e2)) (define (inv-e e) @@ -145,21 +171,14 @@ (cond ((not n) (list '>= m e)) ((eqv? m n) (list '= m e)) (else (list '** m n e)))) -; NB: here we rely on a 'hidden feature' of the SRE specification: the second counter -; of the ** repeat can be #f (standing in for infinity); this extension is supported -; by Alex Shinn's reference implementation for both ** and **?, which makes it unnecessary -; to have nongreedy version of >= and/or duplicate repeated expression as a workaround -; If your SRE implementation does not support it, you may use (: e (*? e)) for +? and -; (: (**? m m e) (*? e)) for +=? if not for the fact that duplicated groups will not -; be counted properly (define (opt-e e) (if (pair? e) (case (car e) ((?) `(?? ,(cadr e))) ((*) `(*? ,(cadr e))) - ((+) `(**? 1 #f ,(cadr e))) ; see note above + ((+) `(**? 1 ,(infub) ,(cadr e))) ; see note above ((=) `(**? ,(cadr e) ,(cadr e) ,(caddr e))) - ((>=) `(**? ,(cadr e) #f ,(caddr e))) ; see note above + ((>=) `(**? ,(cadr e) ,(infub) ,(caddr e))) ; see note above ((**) `(**? ,@(cdr e))) (else `(? ,e))) (list '? e))) @@ -167,7 +186,7 @@ (list e e1)) (define (group-e e) (list '$ e)) (define (ungroup-e e) (if (and (= (length e) 2) (eq? (car e) '$)) (cadr e) e)) -(define (namegroup-e name e) (list '-> name e)) +(define (namegroup-e name e) (list arrsym name e)) (define (backref-e n) (list 'backref n)) (define (lookahead-e e) (list 'look-ahead e)) (define (lookbehind-e e) (list 'look-behind e)) @@ -187,7 +206,7 @@ ; PCRE-like notation parser -(define (parse-re-spec src o) ;=> e, s +(define (parse-ssre-spec src o) ;=> e, s (define (parse-body s o) ;=> e, s (let ((s0 (prefix? "(?" s))) (if (and s0 (pair? s0) (or (char-alphabetic? (car s0)) (eqv? (car s0) #\-))) @@ -389,6 +408,13 @@ (define (check-bcnd t e s op) (unless (eq? t 'bcnd) (fail s (string-append op " applied no non-bcnd argument") e s))) + (define (parse-body s o) ;=> t, e, s + (cond ((prefix? "?" s) + (receive (s o1) (parse-re-options (cdr s) o) + (unless (prefix? ":" s) (fail s "missing : after option flags")) + (receive (t e s) (parse-body (o-skip (cdr s) o1) o1) + (values t (with-e (o-wrappers o o1) e) s)))) + (else (parse-or s o)))) (define (parse-or s o) ;=> t, e, s (receive (t e s) (parse-in s o) (let loop ((t t) (e e) (s (o-skip s o))) @@ -424,10 +450,13 @@ (check-bcnd t e (cdr s) "!") (values 'bcnd (not-e e) s1))) ((and (pair? s) (eqv? (car s) obrc)) - (receive (t e s) (parse-re-set (cdr s) o) - (when (or (null? s) (not (eqv? (car s) cbrc))) - (fail s "missing }")) - (values t e (cdr s)))) + (let ((s (o-skip (cdr s) o))) + (if (and (pair? s) (eqv? (car s) cbrc)) + (values 'cset (e-null) (cdr s)) ; {} => (or) + (receive (t e s) (parse-body s o) + (when (or (null? s) (not (eqv? (car s) cbrc))) + (fail s "missing }")) + (values t e (cdr s)))))) ((and (pair? s) (eqv? (car s) obrk)) (receive (cs s) (parse-re-class (cdr s) o) (when (or (null? s) (not (eqv? (car s) cbrk))) @@ -437,25 +466,480 @@ (define (parse-prim s0 o) ;=> t, e, s (define (name-char? c) (or (char-alphabetic? c) (eqv? c #\_) (eqv? c #\<) (eqv? c #\>))) - (if (or (eqv? (car s0) #\^) (eqv? (car s0) #\/)) - (let ((name (string->symbol (string (car s0))))) - (receive (t e) (ref-named-expr name o s0) - (values t e (cdr s0)))) - (let loop ((s s0) (l '())) - (cond ((and (pair? s) (name-char? (car s))) - (loop (cdr s) (cons (car s) l))) - ((pair? l) - (let ((name (string->symbol (list->string (reverse l))))) - (receive (t e) (ref-named-expr name o s0) - (values t e s)))) - (else (fail s0 "name expected")))))) - (receive (t e s) (parse-or (o-skip src o) o) - (values t e (o-skip s o)))) + (let loop ((s s0) (l '())) + (cond ((and (pair? s) (name-char? (car s))) + (loop (cdr s) (cons (car s) l))) + ((pair? l) + (let ((name (string->symbol (list->string (reverse l))))) + (receive (t e) (ref-named-expr name o s0) + (values t e s)))) + (else (fail s0 "name expected"))))) + (let ((s (o-skip src o))) + (if (and (pair? s) (eqv? (car s) cbrc)) + (values 'cset (e-null) s) ; {} => (or) + (receive (t e s) (parse-body s o) + (values t e (o-skip s o)))))) + +; PCRE-like notation unparser (tries to use extended syntax only if necessary) + +(define (unfail msg . args) + (apply error 'sre->ssre msg args)) + +#| sre->core-sre converts sre to a simplified 'core' grammar that looks like this: + ::= + | (cset ) ; *extension, marks cset-typed subtrees + | (bcnd ) ; *extension, marks bcnd-typed subtrees + | (named "tn") ; symbol names a definition of expr kind, "tn" can be #f + | (shortcut "tn") ; ::= #\X, and top name "tn" can be #f + | (** ) ; to matches; can be #f (meaning infinity) + | (**? ) ; to non-greedy matches; can be #f (meaning infinity) + | (or ...) ; zero or at least 2 alternatives + | (: ...) ; zero or at least 2 concatenees + | ($ ) ; numbered submatch + | (-> ) ; named submatch + | (w/case ) ; single-arg case and unicode toggling + | (w/nocase ) + | (w/ascii ) + | (w/unicode ) + | (look-ahead ) ; zero-width look-ahead assertion + | (look-behind ) ; zero-width look-behind assertion + | (neg-look-ahead ) ; zero-width negative look-ahead assertion + | (neg-look-behind ) ; zero-width negative look-behind assertion + | (backref ) ; match a previous submatch + + ::= + | + | (/ ) ; elementary char range + | (named "tn") ; symbol names a definition of csre kind, "tn" can be #f + | (shortcut "tn") ; ::= #\s | #\d | #\w | #\S | #\D | #\W, and "tn" can be #f + | (or ...) ; union of zero or at least two + | (& ...) ; intersection of zero or at least two + | (- ...) ; difference of at least two + | (~ ) ; complement of exactly one + | (w/case ) ; single-arg case and unicode toggling + | (w/nocase ) + | (w/ascii ) + | (w/unicode ) + + ::= + | (named "tn") ; symbol names a definition of bcnd kind, "tn" can be #f + | (shortcut "tn") ; ::= #\< | #\> | #\b | #\A | #\z, and "tn" can be #f + | (or ...) ; logical OR of zero or at least two + | (neg-look-ahead ) ; logical NOT of exactly one +|# + +(define (sre->core-sre sre o) ;=> cre + (define (range-spec? rs) (or (char? rs) (string? rs))) + (define (headed-list? x . hl) (and (pair? x) (memq (car x) hl) (list? (cdr x)))) + (define (list1? x) (and (pair? x) (null? (cdr x)))) + (define (list2? x) (and (pair? x) (list1? (cdr x)))) + (define (arg-or-seq l) (if (list1? l) (car l) (cons ': l))) + (define (arg-or-union l) (if (list1? l) (car l) (cons 'or l))) + (define (count? x) (and (number? x) (exact? x) (not (negative? x)))) + (define (alnum? x) (memq x '(alphanumeric alphanum alnum))) + (define (underscore? x) (member x '(#\_ ("_")))) + (define (wordcs? r) + (and (headed-list? r orsym 'or) (list2? (cdr r)) + (or (and (alnum? (cadr r)) (underscore? (caddr r))) (and (underscore? (cadr r)) (alnum? (caddr r)))))) + (define (wordbnd? r) + (and (headed-list? r orsym 'or) (member (cdr r) '((bow eow) (eow bow))))) + ; shortcuts do not depend on the current definitions paramteter + (define (shortcut-expr-sre? r) ;=> char | #f + (cond ((memq r '(grapheme)) #\X) + ((equal? r '(: (? #\newline) eos)) #\Z) ; mostly roundtripping hack + (else #f))) + (define (shortcut-cset-sre? r) ;=> char | #f + (cond ((memq r '(numeric num)) #\d) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (memq (cadr r) '(numeric num))) #\D) + ((memq r '(whitespace white space)) #\s) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (memq (cadr r) '(whitespace white space))) #\S) + ((wordcs? r) #\w) + ((and (headed-list? r '~ 'complement) (list1? (cdr r)) (wordcs? (cadr r))) #\W) + (else #f))) + (define (shortcut-bcnd-sre? r) ;=> char | #f + (cond ((eq? r 'bos) #\A) ; better use ^ outside of charsets + ((eq? r 'eos) #\z) ; better use $ outside of charsets + ((eq? r 'bow) #\<) + ((eq? r 'eow) #\>) + ((wordbnd? r) #\b) + ((eq? r 'nwb) #\B) + ((and (headed-list? r 'neg-look-ahead) (list1? (cdr r)) (wordbnd? (cadr r))) #\B) + (else #f))) + ; try to map r to our 'canonical' name for reverse definition lookup + (define (named-class-sre? r) ;=> sym | #f + (cond ((assq r '((lower-case . lower) (lower . lower) (upper-case . upper) (upper . upper) + (title-case . title) (title . title) (alphabetic . alpha) (alpha . alpha) + (whitespace . space) (white . space) (space . space) (numeric . numeric) (num . numeric) + (alphanumeric . alnum) (alphanum . alnum) (alnum . alnum) (symbol . symbol) + (control . cntrl) (cntrl . cntrl) (printing . print) (print . print) + (graphic . graph) (graph . graph) (punctuation . punct) (punct . punct) + (hex-digit . xdigit) (xdigit . xdigit) (ascii . ascii))) => cdr) + (else #f))) + (define (lookup r o) + (let ((rv (or (and (symbol? r) (named-class-sre? r)) r))) + (o-reverse-lookup rv o))) + (define (lookup-name r o t) + (let ((x (lookup r o))) (and x (eq? (cadr x) t) (car x)))) + (define (flatten-char-ranges l) ;=> (start-char end-char ...) + (let flatten ((l l) (fl '())) + (cond ((null? l) (reverse fl)) + ((string? (car l)) (flatten (append (string->list (car l)) (cdr l)) fl)) + (else (flatten (cdr l) (cons (car l) fl)))))) + (define (lct t1 t2) ; least common type + (cond ((not t1) t2) ((not t2) t1) ((eq? t1 t2) t1) (else 'expr))) + (define (cast cr rt ct) ;=> cr' + (if (eq? rt ct) cr `(,rt ,cr))) + (define (cast-noncasted cr ct) ;=> (rt cr') + (if (headed-list? cr 'bcnd 'cset) cr `(,ct ,cr))) + ; do not sort, just cluster/merge same-type neighbors + (define (or-join r1 r2 ti) + (cond ;these tests shorten the output, but lead to quadratic behavior + ;((and (eq? ti 'cset) (equal? r1 r2) r1)) ; safe: no groups inside + ;((and (headed-list? r1 'or) (member r2 (cdr r1))) r1) + (else (or-e r1 r2)))) + (define (finalize-or rl tl) + (let loop ((rl rl) (tl tl) (ct #f) (crl '()) (ctl '())) + (if (null? rl) + (let ((l (reverse (map (lambda (r t) (cast r t ct)) crl ctl)))) + (cond ((null? l) (values '(or) 'cset)) + ((null? (cdr l)) (values (car l) ct)) + (else (values `(or . ,l) ct)))) + (let ((ri (car rl)) (ti (car tl))) + (if (and (pair? (cdr tl)) (eq? ti (cadr tl))) + (loop (cons (or-join ri (cadr rl) ti) (cddr rl)) (cons ti (cddr tl)) ct crl ctl) + (loop (cdr rl) (cdr tl) (lct ct ti) (cons ri crl) (cons ti ctl))))))) + (define (convert r o) + (let cvt ((r r)) + (cond + ((eq? r 'bos) (values `(shortcut #\A ,(lookup-name r o 'bcnd) "^") 'bcnd)) + ((eq? r 'eos) (values `(shortcut #\z ,(lookup-name r o 'bcnd) "$") 'bcnd)) + ((eq? r 'nonl) (values `(named ,(lookup-name r o 'cset) ".") 'cset)) + ((shortcut-expr-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'expr) #f) 'expr))) + ((shortcut-cset-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'cset) #f) 'cset))) + ((shortcut-bcnd-sre? r) => + (lambda (c) (values `(shortcut ,c ,(lookup-name r o 'bcnd) #f) 'bcnd))) + ((lookup r o) => + (lambda (x) (values `(named ,(car x) #f) (cadr x)))) + ((string? r) + (cvt `(: . ,(string->list r)))) + ((headed-list? r '* 'zero-or-more) + (cvt `(** 0 #f . ,(cdr r)))) + ((headed-list? r '+ 'one-or-more) + (cvt `(** 1 #f . ,(cdr r)))) + ((headed-list? r '? 'optional) + (cvt `(** 0 1 . ,(cdr r)))) + ((and (headed-list? r '= 'exactly) (>= (length r) 2) (count? (cadr r))) + (cvt `(** ,(cadr r) ,(cadr r) . ,(cddr r)))) + ((and (headed-list? r '>= 'at-least) (>= (length r) 2) (count? (cadr r))) + (cvt `(** ,(cadr r) #f . ,(cddr r)))) + ((and (headed-list? r '** 'repeated) + (>= (length r) 3) (count? (cadr r)) (or (count? (caddr r)) (infub? (caddr r)))) + (if (and (eqv? (cadr r) 1) (eqv? (caddr r) 1)) + (cvt (arg-or-seq (cdddr r))) + (receive (cr ct) (cvt (arg-or-seq (cdddr r))) + (define ub (if (infub? (caddr r)) #f (caddr r))) + (values `(** ,(cadr r) ,ub ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r orsym 'or) + (let loop ((l (cdr r)) (rl '()) (tl '())) + (if (null? l) (finalize-or (reverse rl) (reverse tl)) + (receive (cr ct) (cvt (car l)) + (if (headed-list? cr 'or) ; splice in nested ors using ct for noncasted + (let* ((srl (reverse (cdr cr))) ; mix of casted and noncasted (ct) + (crl (map (lambda (r) (cast-noncasted r ct)) srl))) ; all casted + ; separate cast types and cres for the rest of the algorithm + (loop (cdr l) (append (map cadr crl) rl) (append (map car crl) tl))) + (loop (cdr l) (cons cr rl) (cons ct tl))))))) + ((headed-list? r ': 'seq) + (let loop ((l (cdr r)) (rl '())) + (if (null? l) + (let ((l (reverse rl))) + (cond ((null? l) (values '(:) 'expr)) ; epsilon + ((null? (cdr l)) (values (car l) 'expr)) + (else (values `(: . ,l) 'expr)))) + (receive (cr ct) (cvt (car l)) + (if (headed-list? cr ':) ; splice in nested :s + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons (cast cr ct 'expr) rl))))))) + ((headed-list? r '$ 'submatch) + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + (if (o-set? 'n o) (values cr ct) (values `($ ,(cast cr ct 'expr)) 'expr)))) + ((and (headed-list? r arrsym 'submatch-named) (>= (length r) 2) (symbol? (cadr r))) + (receive (cr ct) (cvt (arg-or-seq (cddr r))) + (if (o-set? 'n o) (values cr ct) (values `(,arrsym ,(cadr r) ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r 'w/case 'w/nocase 'w/unicode 'w/ascii) + ; since we don't allow multiargument w/xxx in context, we just + ; wrap multiple args in a seq, leaving a single arg as-is; type errors won't + ; allow the sequenced ones to be used in the context + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + (values `(,(car r) ,cr) ct))) ; do not cast cr to 'expr: has to work in all contexts + ((headed-list? r 'w/nocapture) + (convert (arg-or-seq (cdr r)) (o-add 'n o))) ; use 'n flag to kill BOTH numbered and named captures! + ((memq r '(bos eos bol eol bog eog bow eow nwb)) + (values r 'bcnd)) + ((memq r '(grapheme word)) + (values r 'expr)) + ((headed-list? r 'word) + (cvt `(: bow ,@(cdr r) eow))) + ((headed-list? r 'word+) + (if (equal? (cdr r) '(any)) + (values 'word 'expr) + (cvt `(word (+ (and (or alnum #\_) (or ,@(cdr r)))))))) + ((headed-list? r '*? 'non-greedy-zero-or-more) + (cvt `(**? 0 #f . ,(cdr r)))) + ((headed-list? r '?? 'non-greedy-optional) + (cvt `(**? 0 1 . ,(cdr r)))) + ((and (headed-list? r '**? 'non-greedy-repeated) + (>= (length r) 3) (count? (cadr r)) (or (count? (caddr r)) (infub? (caddr r)))) + (if (and (eqv? (cadr r) 1) (eqv? (caddr r) 1)) + (cvt (arg-or-seq (cdddr r))) + (receive (cr ct) (cvt (arg-or-seq (cdddr r))) + (define ub (if (infub? (caddr r)) #f (caddr r))) + (values `(**? ,(cadr r) ,ub ,(cast cr ct 'expr)) 'expr)))) + ((headed-list? r 'look-ahead 'look-behind 'neg-look-ahead 'neg-look-behind) + (receive (cr ct) (cvt (arg-or-seq (cdr r))) + ; in truth, all lookarounds are boundary conditions, but we only want to keep a small + ; subset of them as conditions for the purpose of rendering them via the {..} notation + (define rct (if (and (eq? (car r) 'neg-look-ahead) (eq? ct 'bcnd)) 'bcnd 'expr)) + (values `(,(car r) ,(cast cr ct rct)) rct))) + ((and (headed-list? r 'backref) (= (length r) 2) (or (symbol? (cadr r)) (count? (cadr r)))) + (values r 'expr)) + ; csets, fall through + ((char? r) + (values r 'cset)) + ((and (list1? r) (string? (car r))) + (cvt (cons 'or (string->list (car r))))) + ((and (headed-list? r 'char-set) (list1? (cdr r)) (string? (cadr r))) + (cvt (cons 'or (string->list (cadr r))))) + ((and (headed-list? r '/ 'char-range) (andmap range-spec? (cdr r))) + (let loop ((cr* (flatten-char-ranges (cdr r))) (rl '())) + (cond ((and (null? cr*) (list1? rl)) (values (car rl) 'cset)) + ((null? cr*) (values `(or . ,(reverse rl)) 'cset)) + ((null? (cdr cr*)) (unfail "odd char count in char range SRE" r)) + ((eqv? (car cr*) (cadr cr*)) (loop (cddr cr*) (cons (car cr*) rl))) + ((char<=? (car cr*) (cadr cr*)) (loop (cddr cr*) (cons `(/ ,(car cr*) ,(cadr cr*)) rl))) + (else (unfail "invalid char range in SRE" r (car cr*) (cadr cr*)))))) + ((headed-list? r '& 'and) + (cond ((null? (cdr r)) (values '(&) 'cset)) ; neutral element for and, same as 'any' + ((null? (cddr r)) (cvt (cadr r))) ; idty: do not upgrade type? + (else (let loop ((l (cdr r)) (rl '())) + (if (null? l) (values `(& . ,(reverse rl)) 'cset) + (receive (cr ct) (cvt (car l)) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (& ...)" (car l))) + (if (headed-list? cr '&) ; splice in nested ands + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons cr rl))))))))) + ((and (headed-list? r '- 'difference) (pair? (cdr r))) + (cond ((null? (cddr r)) (cvt (cadr r))) ; idty: do not upgrade type? + (else (receive (cr0 ct0) (cvt (cadr r)) + (unless (eq? ct0 'cset) (unfail "non-cset argument inside (- ...)" (cadr r))) + (let loop ((l (cddr r)) (rl (list cr0))) + (if (null? l) (values `(- . ,(reverse rl)) 'cset) + (receive (cr ct) (cvt (car l)) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (& ...)" (car l))) + (if (headed-list? cr 'or) ; splice in nested ors + (loop (cdr l) (append (reverse (cdr cr)) rl)) + (loop (cdr l) (cons cr rl)))))))))) + ((headed-list? r '~ 'complement) + (receive (cr ct) (cvt (arg-or-union (cdr r))) + (unless (eq? ct 'cset) (unfail "non-cset argument inside (~ ...)" r)) + (values `(~ ,cr) 'cset))) + (else (unfail "invalid or unsupported SRE" r))))) + ; start the conversion + (receive (cr ct) (convert sre o) + (cast cr ct 'expr))) + +; internal +(define (sre->csre sre) ;=> csre + (define ds (ssre-definitions)) + (sre->core-sre sre (cons 'u (ds-nes ds)))) + +; render csre grammar to a text port p +(define (unparse-csre-spec csre p) + (define (emit . xl) (for-each (lambda (x) (display x p)) xl)) + (define (emit-shortcut x) (if (char? x) (emit #\\ x) (emit x))) + (define (headed-list? x . hl) (and (pair? x) (memq (car x) hl) (list? (cdr x)))) + (define (options-prefix x) + (case x ((w/case) "-i") ((w/nocase) "i") ((w/unicode) "u") ((w/ascii) "-u"))) + (define (lookaround-prefix x) + (case x ((look-ahead) "=") ((neg-look-ahead) "!") ((look-behind) "<=") ((neg-look-behind) " (length (cdr r)) 1) (andmap cset-class-elt? (cdr r))) + (and (headed-list? r '~) (headed-list? (cadr r) 'or) + (pair? (cdadr r)) (andmap cset-class-elt? (cdadr r))))) + ; entry point + (define (unparse-top r) + ; recognize popular nondefault option prefixes, to save on : + (cond ((headed-list? r 'w/nocase 'w/case 'w/ascii 'w/unicode) + (emit "(?" (options-prefix (car r)) ")") (unparse-top (cadr r))) + ((and (headed-list? r 'cset) ; pull w/xxx out of cset wrapper + (headed-list? (cadr r) 'w/nocase 'w/case 'w/ascii 'w/unicode)) + (unparse-top `(,(car (cadr r)) (cset ,(cadr (cadr r)))))) + (else (unparse-body r)))) + (define (unparse-body r) + (unparse-alt r)) + (define (unparse-alt r) + (cond ((equal? r '(or)) ; special case + (emit "{}")) + ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-seq r)))) + (define (unparse-seq r) + (cond ((headed-list? r ':) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-seq (car l)) + (loop (cdr l))))) + (else (unparse-quant r)))) + (define (unparse-quant r) + (cond ((headed-list? r '** '**?) + ; make sure nested repeats, if they happen, are rendred as separate + ; so that no unexpected non-greedy combos are produced by the parser + (cond ((not (headed-list? (cadddr r) '** '**?)) (unparse-quant (cadddr r))) + (else (emit "(?:") (unparse-quant (cadddr r)) (emit ")"))) + (cond ((and (eqv? (cadr r) 0) (eqv? (caddr r) 1)) (emit #\?)) + ((and (eqv? (cadr r) 0) (not (caddr r))) (emit #\*)) + ((and (eqv? (cadr r) 1) (not (caddr r))) (emit #\+)) + ((eqv? (cadr r) (caddr r)) (emit #\{ (cadr r) #\})) + (else (emit #\{ (cadr r) #\, (or (caddr r) "") #\}))) + (when (eq? (car r) '**?) (emit #\?))) + (else (unparse-prim r)))) + (define (unparse-prim r) + (cond ((headed-list? r 'cset) (unparse-cset (cadr r))) + ((headed-list? r 'bcnd) (unparse-bcnd (cadr r))) + ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'named) + (cond ((caddr r) => emit) ; use "top name" if any + (else (emit "\\p{" (cadr r) "}")))) + ((headed-list? r '$) + (emit "(") (unparse-body (cadr r)) (emit ")")) + ((headed-list? r arrsym) + (emit "(?<" (cadr r) ">") (unparse-body (caddr r)) (emit ")")) + ((headed-list? r 'w/case 'w/nocase 'w/unicode 'w/ascii) + (emit "(?" (options-prefix (car r)) ":") (unparse-body (cadr r)) (emit ")")) + ((headed-list? r 'look-ahead 'neg-look-ahead 'look-behind 'neg-look-behind) + (emit "(?" (lookaround-prefix (car r))) (unparse-body (cadr r)) (emit ")")) + ((headed-list? r 'backref) + (cond ((symbol? (cadr r)) (emit "\\k<" (cadr r) ">")) + ((< (cadr r) 100) (emit "\\" (cadr r))) + (else (unfail "numerical backref out of range" (cadr r))))) + ((headed-list? r ': 'or) + (emit "(?:") (unparse-body r) (emit ")")) + (else (unfail "unsupported SRE" r)))) + (define (unparse-cset r) + (cond ((char? r) + (case r ((#\\ #\^ #\$ #\. #\| #\* #\+ #\? #\[ #\] #\( #\) #\{ #\}) (emit #\\))) + (emit r)) + ((and (headed-list? r 'named) (caddr r)) => emit) ; use "top name" if any + ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'w/case 'w/nocase 'w/ascii 'w/unicode) + (emit "(?" (options-prefix (car r)) #\:) (unparse-cset (cadr r)) (emit ")")) + ((cset-class? r) (unparse-cset-class r)) + (else (emit "{") (unparse-cset-body r) (emit "}")))) + (define (unparse-cset-body r) + (cond (else (unparse-cset-alt r)))) + (define (unparse-cset-alt r) + (cond ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-cset-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-cset-infix r)))) + (define (unparse-cset-infix r) + (cond ((headed-list? r '- '&) + (let loop ((l (cdr r))) + (cond ((null? (cdr l)) (unparse-cset-prefix (car l))) + (else (unparse-cset-prefix (car l)) + (if (headed-list? r '&) (emit #\&) (emit #\-)) + (loop (cdr l)))))) + (else (unparse-cset-prefix r)))) + (define (unparse-cset-prefix r) + (cond ((headed-list? r '~) + (emit "~") (unparse-cset-prefix (cadr r))) + (else (unparse-cset-prim r)))) + (define (unparse-cset-prim r) + (cond ((headed-list? r 'named) (emit (cadr r))) + ((headed-list? r 'shortcut) + (cond ((caddr r) => emit) ; use name if any + (else (emit-shortcut (cadr r))))) + ((cset-class? r) (unparse-cset-class r)) + ((headed-list? r 'w/case 'w/nocase 'w/ascii 'w/unicode) + (emit "{?" (options-prefix (car r)) #\:) (unparse-cset-body (cadr r)) (emit "}")) + ((headed-list? r 'or) + (emit "{") (unparse-cset-body r) (emit "}")) + (else (unfail "invalid SRE char set" r)))) + (define (unparse-cset-class r) + (cond ((headed-list? r '~) (emit "[^") (unparse-class-body (cadr r)) (emit "]")) + (else (emit "[") (unparse-class-body r) (emit "]")))) + (define (unparse-class-body r) + (cond ((headed-list? r 'or) (for-each unparse-class-elt (cdr r))) + (else (unparse-class-elt r)))) + (define (unparse-class-elt r) + (cond ((char? r) + (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r)) + ((headed-list? r '/) + (let ((r (cadr r))) (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r)) + (emit #\-) + (let ((r (caddr r))) (case r ((#\\ #\^ #\- #\[ #\]) (emit #\\))) (emit r))) + ((headed-list? r 'named) (emit "[:" (cadr r) ":]")) + ((headed-list? r 'shortcut) (emit-shortcut (cadr r))) + (else (unfail "invalid SRE class element" r)))) + (define (unparse-bcnd r) + (cond ((headed-list? r 'shortcut) + (cond ((cadddr r) => emit) ; use "top name" if any + (else (emit-shortcut (cadr r))))) + (else (emit "{") (unparse-bcnd-body r) (emit "}")))) + (define (unparse-bcnd-body r) + (unparse-bcnd-alt r)) + (define (unparse-bcnd-alt r) + (cond ((headed-list? r 'or) + (let loop ((l (cdr r))) + (unless (null? l) + (unparse-bcnd-alt (car l)) + (unless (null? (cdr l)) (emit #\|)) + (loop (cdr l))))) + (else (unparse-bcnd-prefix r)))) + (define (unparse-bcnd-prefix r) + (cond ((and (headed-list? r 'neg-look-ahead)) + (emit "!") (unparse-bcnd-prefix (cadr r))) + (else (unparse-bcnd-prim r)))) + (define (unparse-bcnd-prim r) + (cond ((char? r) (emit r)) + ((headed-list? r 'named) (emit (cadr r))) + ((headed-list? r 'shortcut) + (cond ((caddr r) => emit) ; use name if any + (else (emit-shortcut (cadr r))))) + ((headed-list? r 'or) + (emit "{") (unparse-bcnd-body r) (emit "}")) + (else (unfail "invalid SRE boundary condition" r)))) + ; start here + (unparse-top csre)) + +; internal +(define (csre->ssre cs) + (let ((p (open-output-string))) + (unparse-csre-spec cs p) + (get-output-string p))) + +; (unparse-re-spec sre p o) (define named-exprs '( - (/ cset #\\) - (^ cset #\^) + (ascii cset ascii) + (nonl cset nonl) (any cset any) (_ cset any) (digit cset numeric) (n cset numeric) (d cset numeric) (lower cset lower) (l cset lower) @@ -468,10 +952,10 @@ (graph cset graph) (g cset graph) (symbol cset symbol) (y cset symbol) (print cset print) (gs cset print) - (blank cset (or #\space #\tab)) (h cset (or #\space #\tab)) + (blank cset (or #\space #\tab)) (h cset (or #\space #\tab)) ; ascii version (space cset space) (s cset space) (w cset (or alnum #\_)) - (v cset (- space (or #\space #\tab))) + (v cset (- space (or #\space #\tab))) ; ascii version (bos bcnd bos) ( bcnd eos) (bol bcnd bol) (string name)))))) ; definitions are wrapped into a ds structure with 2 extra slots to contain cached data; -; cache #1 is for string-sre->sre, cache #2 for string-sre->regexp +; cache #1 is for ssre->sre, cache #2 for ssre->regexp (define (make-ds nes) (vector nes '() '())) (define (ds-nes ds) (vector-ref ds 0)) @@ -514,47 +998,54 @@ ; a parameter procedure can be called with a value argument to set the parameter globally. ; This behavior is not required by R7RS. -(define string-sre-definitions - (make-parameter (make-ds named-exprs))) +(define ssre-definitions + ; named-exprs is reversed here to make sure o-reverse-lookup picks shorter names + (make-parameter (make-ds (reverse named-exprs)))) -(define (string-sre-bind n t e ds) - (make-ds (cons (list n t e) (ds-nes ds)))) +(define (ssre-bind n t e ds) + (make-ds (cons (list n t e) (ds-nes (ssre-unbind n ds))))) -(define (string-sre-unbind n ds) +(define (ssre-unbind n ds) (define (unbind n nes) (cond ((null? nes) nes) ((and (pair? nes) (pair? (car nes)) (eq? (caar nes) n)) (unbind n (cdr nes))) - (else (cons (car nes) (string-sre-unbind n (cdr nes)))))) + (else (cons (car nes) (unbind n (cdr nes)))))) (make-ds (unbind n (ds-nes ds)))) (define (ssre-fancy-error str src msg args) (define p (- (string-length str) (length src))) - (define m (string-append "string-sre->sre: " msg)) + (define m (string-append "ssre->sre: " msg)) (when (>= p 0) ; todo: what if str is multi-line? pick p line only! (set! m (string-append m "\n" str "\n" (make-string p #\space) "^"))) (apply error m args)) -(define (string-sre-syntax-error? x) - (and (list? x) (= (length x) 4) (eq? (car x) 'string-sre->sre) +(define (ssre-syntax-error? x) + (and (list? x) (= (length x) 4) (eq? (car x) 'ssre->sre) (string? (cadr x)) (string? (caddr x)) (list? (cadddr x)))) -(define (string-sre->sre str) - (define ds (string-sre-definitions)) - (define cs (cache-slot ds 1 str)) ; cache #1 is for string-sre->sre +(define (ssre->sre str) + (define ds (ssre-definitions)) + (define cs (cache-slot ds 1 str)) ; cache #1 is for ssre->sre (or (cdr cs) - (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) - (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) + (guard (x ((ssre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) + (receive (e s) (parse-ssre-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (set-cdr! cs e) e)))) -(define (string-sre->regexp str) - (define ds (string-sre-definitions)) - (define cs (cache-slot ds 2 str)) ; cache #2 is for string-sre->regexp +(define (ssre->regexp str) + (define ds (ssre-definitions)) + (define cs (cache-slot ds 2 str)) ; cache #2 is for ssre->regexp (or (cdr cs) - (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) - (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) + (guard (x ((ssre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) + (receive (e s) (parse-ssre-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (let ((re (regexp e))) (set-cdr! cs re) re))))) + +(define (sre->ssre sre) + (define ds (ssre-definitions)) + (let ((p (open-output-string))) + (unparse-csre-spec (sre->core-sre sre (cons 'u (ds-nes ds))) p) + (get-output-string p)))