Skip to content

Commit 4eb1c43

Browse files
committed
Add Lexer, Token, and lexer_test
- TestNextToken() reads in input string and checks tokens get assigned right type and literal - Token defines Token struct and intial token constants: Identifiers + literals, Operators, Delimiters, Keywords - Lexer converts source code to tokens: New(), readChar(), NextToken(), newToken()
1 parent e983324 commit 4eb1c43

File tree

5 files changed

+140
-0
lines changed

5 files changed

+140
-0
lines changed

.envrc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export GOPATH=$(pwd)

pkg/darwin_amd64/monkey/token.a

3.87 KB
Binary file not shown.

src/monkey/lexer/lexer.go

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package lexer
2+
3+
import "monkey/token"
4+
5+
type Lexer struct {
6+
input string
7+
position int // current position in input (points to current char)
8+
readPosition int // next reading position in input
9+
ch byte // current char
10+
11+
// Both position and readPosition are to access characters in input by using
12+
// them as an index, ex.: l.input[l.readPosition]. These 2 “pointers” point into
13+
// our input string because we need to “peek” ahead to see what comes
14+
// next after the current character. readPosition always points to the “next”
15+
// input character. position points to input character corresponding to
16+
// the ch byte.
17+
}
18+
19+
func New(input string) *Lexer {
20+
l := &Lexer{input: input}
21+
l.readChar()
22+
return l
23+
}
24+
25+
// Only for ASCII characters, not full UTF-8
26+
func (l *Lexer) readChar() {
27+
28+
// If hit end of input, set ch = 0 (NUL character).
29+
// So we've read nothing yet or it's EOF.
30+
if l.readPosition >= len(l.input) {
31+
l.ch = 0
32+
33+
} else {
34+
l.ch = l.input[l.readPosition] // next character
35+
}
36+
37+
l.position = l.readPosition
38+
l.readPosition += 1
39+
}
40+
41+
func (l *Lexer) NextToken() token.Token {
42+
var tok token.Token
43+
switch l.ch {
44+
case '=':
45+
tok = newToken(token.ASSIGN, l.ch)
46+
case ';':
47+
tok = newToken(token.SEMICOLON, l.ch)
48+
case '(':
49+
tok = newToken(token.LPAREN, l.ch)
50+
case ')':
51+
tok = newToken(token.RPAREN, l.ch)
52+
case ',':
53+
tok = newToken(token.COMMA, l.ch)
54+
case '+':
55+
tok = newToken(token.PLUS, l.ch)
56+
case '{':
57+
tok = newToken(token.LBRACE, l.ch)
58+
case '}':
59+
tok = newToken(token.RBRACE, l.ch)
60+
case 0:
61+
tok.Literal = ""
62+
tok.Type = token.EOF
63+
}
64+
l.readChar()
65+
66+
return tok
67+
}
68+
69+
func newToken(tokenType token.TokenType, ch byte) token.Token {
70+
return token.Token{Type: tokenType, Literal: string(ch)}
71+
}

src/monkey/lexer/lexer_test.go

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package lexer
2+
3+
import (
4+
"monkey/token"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
)
9+
10+
func TestNextToken(t *testing.T) {
11+
input := `=+(){},;`
12+
tests := []struct {
13+
expectedType token.TokenType
14+
expectedLiteral string
15+
}{
16+
{token.ASSIGN, "="},
17+
{token.PLUS, "+"},
18+
{token.LPAREN, "("},
19+
{token.RPAREN, ")"},
20+
{token.LBRACE, "{"},
21+
{token.RBRACE, "}"},
22+
{token.COMMA, ","},
23+
{token.SEMICOLON, ";"},
24+
{token.EOF, ""},
25+
}
26+
27+
l := New(input)
28+
29+
for _, test := range tests {
30+
tok := l.NextToken()
31+
32+
assert.Equal(t, test.expectedType, tok.Type)
33+
assert.Equal(t, test.expectedLiteral, tok.Literal)
34+
}
35+
}

src/monkey/token/token.go

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package token
2+
3+
type TokenType string
4+
5+
type Token struct {
6+
Type TokenType
7+
Literal string
8+
}
9+
10+
const (
11+
ILLEGAL = "ILLEGAL" // a token/character not on the list
12+
EOF = "EOF" // parser stops here
13+
14+
// Identifiers + literals
15+
IDENT = "IDENT" // variable names
16+
INT = "INT"
17+
18+
// Operators
19+
ASSIGN = "="
20+
PLUS = "+"
21+
22+
// Delimiters
23+
COMMA = ","
24+
SEMICOLON = ";"
25+
LPAREN = "("
26+
RPAREN = ")"
27+
LBRACE = "{"
28+
RBRACE = "}"
29+
30+
// Keywords
31+
FUNCTION = "FUNCTION"
32+
LET = "LET"
33+
)

0 commit comments

Comments
 (0)