@@ -128,10 +128,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
128128 ; ; have been the "(?:"<regex>")" production
129129 (cons :group (nconc flags (list reg-expr)))
130130 (if (eq open-token :open-paren-less-letter )
131- (list :named-register
132- ; ; every string was reversed, so we have to
133- ; ; reverse it back to get the name
134- (nreverse register-name)
131+ (list :named-register register-name
135132 reg-expr)
136133 (list (case open-token
137134 ((:open-paren )
@@ -201,54 +198,42 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
201198 ; ; to parse a <seq> or <quant> in order to catch empty regular
202199 ; ; expressions
203200 (if (start-of-subexpr-p lexer)
204- (let ((quant (quant lexer)))
205- (if (start-of-subexpr-p lexer)
206- (let* ((seq (seq lexer))
207- (quant-is-char-p (characterp quant))
208- (seq-is-sequence-p (and (consp seq)
209- (eq (first seq) :sequence ))))
210- (cond ((and quant-is-char-p
211- (characterp seq))
212- (make-array-from-two-chars seq quant))
213- ((and quant-is-char-p
214- (stringp seq))
215- (vector-push-extend quant seq)
216- seq)
217- ((and quant-is-char-p
218- seq-is-sequence-p
219- (characterp (second seq)))
220- (cond ((cddr seq)
221- (setf (cdr seq)
222- (cons
223- (make-array-from-two-chars (second seq)
224- quant)
225- (cddr seq)))
226- seq)
227- (t (make-array-from-two-chars (second seq) quant))))
228- ((and quant-is-char-p
229- seq-is-sequence-p
230- (stringp (second seq)))
231- (cond ((cddr seq)
232- (setf (cdr seq)
233- (cons
234- (progn
235- (vector-push-extend quant (second seq))
236- (second seq))
237- (cddr seq)))
238- seq)
239- (t
240- (vector-push-extend quant (second seq))
241- (second seq))))
242- (seq-is-sequence-p
243- ; ; if <seq> is also a :SEQUENCE parse tree we merge
244- ; ; both lists into one to avoid unnecessary consing
245- (setf (cdr seq)
246- (cons quant (cdr seq)))
247- seq)
248- (t (list :sequence quant seq))))
249- quant))
250- :void )))
251-
201+ (loop with seq-is-sequence-p = nil
202+ with last-cdr
203+ for quant = (quant lexer)
204+ for quant-is-char-p = (characterp quant)
205+ for seq = quant
206+ then
207+ (cond ((and quant-is-char-p (characterp seq))
208+ (make-array-from-two-chars seq quant))
209+ ((and quant-is-char-p (stringp seq))
210+ (vector-push-extend quant seq)
211+ seq)
212+ ((not seq-is-sequence-p)
213+ (setf last-cdr (list quant)
214+ seq-is-sequence-p t )
215+ (list* :sequence seq last-cdr))
216+ ((and quant-is-char-p
217+ (characterp (car last-cdr)))
218+ (setf (car last-cdr)
219+ (make-array-from-two-chars (car last-cdr)
220+ quant))
221+ seq)
222+ ((and quant-is-char-p
223+ (stringp (car last-cdr)))
224+ (vector-push-extend quant (car last-cdr))
225+ seq)
226+ (t
227+ ; ; if <seq> is also a :SEQUENCE parse tree we merge
228+ ; ; both lists into one
229+ (let ((cons (list quant)))
230+ (psetf last-cdr cons
231+ (cdr last-cdr) cons ))
232+ seq))
233+ while (start-of-subexpr-p lexer)
234+ finally (return seq))
235+ :void )))
236+
252237(defun reg-expr (lexer)
253238 " Parses and consumes a <regex>, a complete regular expression.
254239The productions are: <regex> -> <seq> | <seq>\" |\" <regex>.
@@ -294,25 +279,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
294279 (setf (lexer-pos lexer) pos)
295280 seq)))))))
296281
297- (defun reverse-strings (parse-tree)
298- " Recursively walks through PARSE-TREE and destructively reverses all
299- strings in it."
300- (declare #. *standard-optimize-settings*)
301- (cond ((stringp parse-tree)
302- (nreverse parse-tree))
303- ((consp parse-tree)
304- (loop for parse-tree-rest on parse-tree
305- while parse-tree-rest
306- do (setf (car parse-tree-rest)
307- (reverse-strings (car parse-tree-rest))))
308- parse-tree)
309- (t parse-tree)))
310-
311282(defun parse-string (string )
312283 " Translate the regex string STRING into a parse tree."
313284 (declare #. *standard-optimize-settings*)
314285 (let* ((lexer (make-lexer string ))
315- (parse-tree (reverse-strings ( reg-expr lexer) )))
286+ (parse-tree (reg-expr lexer)))
316287 ; ; check whether we've consumed the whole regex string
317288 (if (end-of-string-p lexer)
318289 parse-tree
0 commit comments