Skip to content

Commit 205409c

Browse files
committed
Migrate OCaml Lexer
1 parent e3f2637 commit 205409c

File tree

1 file changed

+44
-37
lines changed

1 file changed

+44
-37
lines changed

lexers/caml.lua

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,57 @@
11
-- Copyright 2006-2025 Mitchell. See LICENSE.
22
-- OCaml LPeg lexer.
3+
-- Migrated by Samuel Marquis.
34

4-
local lexer = require('lexer')
5-
local token, word_match = lexer.token, lexer.word_match
5+
local lexer = lexer
66
local P, S = lpeg.P, lpeg.S
77

8-
local lex = lexer.new('caml')
9-
10-
-- Whitespace.
11-
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
8+
local lex = lexer.new(..., {fold_by_indentation = true})
129

1310
-- Keywords.
14-
lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
15-
'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', 'downto', 'else',
16-
'end', 'exception', 'external', 'failwith', 'false', 'flush', 'for', 'fun', 'function', 'functor',
17-
'if', 'in', 'include', 'incr', 'inherit', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
18-
'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', 'option', 'or',
19-
'parser', 'private', 'raise', 'rec', 'ref', 'regexp', 'sig', 'stderr', 'stdin', 'stdout',
20-
'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with'
21-
}))
11+
lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))
2212

2313
-- Types.
24-
lex:add_rule('type', token(lexer.TYPE, word_match('bool char float int string unit')))
14+
lex:add_rule('type', lex:tag(lexer.TYPE, lex:word_match(lexer.TYPE)))
2515

2616
-- Functions.
27-
lex:add_rule('function', token(lexer.FUNCTION, word_match{
17+
lex:add_rule('function', lex:tag(lexer.FUNCTION, lex:word_match(lexer.FUNCTION)))
18+
19+
-- Identifiers.
20+
local word = (lexer.alnum + S("_'"))^1
21+
lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, word))
22+
23+
-- Strings.
24+
local sq_str = lexer.range("'", true)
25+
local dq_str = lexer.range('"')
26+
lex:add_rule('string', lex:tag(lexer.STRING, sq_str + dq_str))
27+
28+
-- Comments.
29+
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('(*', '*)', false, false, true)))
30+
31+
-- Numbers.
32+
lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number))
33+
34+
-- Operators.
35+
lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')))
36+
37+
lexer.property['scintillua.comment'] = '(*|*)'
38+
39+
lex:set_word_list(lexer.KEYWORD, {
40+
'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done',
41+
'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false', 'flush',
42+
'for', 'fun', 'function', 'functor', 'if', 'in', 'include', 'incr', 'inherit',
43+
'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor', 'match', 'method', 'mod',
44+
'module', 'mutable', 'new', 'not', 'of', 'open', 'option', 'or', 'parser',
45+
'private', 'raise', 'rec', 'ref', 'regexp', 'sig', 'stderr', 'stdin', 'stdout',
46+
'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while',
47+
'with'
48+
})
49+
50+
lex:set_word_list(lexer.TYPE, {
51+
'bool', 'char', 'float', 'int', 'list', 'string', 'unit'
52+
})
53+
54+
lex:set_word_list(lexer.FUNCTION, {
2855
'abs', 'abs_float', 'acos', 'asin', 'atan', 'atan2', 'at_exit', 'bool_of_string', 'ceil',
2956
'char_of_int', 'classify_float', 'close_in', 'close_in_noerr', 'close_out', 'close_out_noerr',
3057
'compare', 'cos', 'cosh', 'decr', 'epsilon_float', 'exit', 'exp', 'failwith', 'float',
@@ -41,25 +68,5 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match{
4168
'seek_out', 'set_binary_mode_in', 'set_binary_mode_out', 'sin', 'sinh', 'snd', 'sqrt', 'stderr',
4269
'stdin', 'stdout', 'string_of_bool', 'string_of_float', 'string_of_format', 'string_of_int',
4370
'succ', 'tan', 'tanh', 'truncate'
44-
}))
45-
46-
-- Identifiers.
47-
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
48-
49-
-- Strings.
50-
local sq_str = lexer.range("'", true)
51-
local dq_str = lexer.range('"', true)
52-
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
53-
54-
-- Comments.
55-
lex:add_rule('comment', token(lexer.COMMENT, lexer.range('(*', '*)', false, false, true)))
56-
57-
-- Numbers.
58-
lex:add_rule('number', token(lexer.NUMBER, lexer.number))
59-
60-
-- Operators.
61-
lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')))
62-
63-
lexer.property['scintillua.comment'] = '(*|*)'
64-
71+
})
6572
return lex

0 commit comments

Comments
 (0)