Add Lexer, Token, and lexer_test

rayning0 · rayning0 · commit 4eb1c43f0730 · 2020-02-11T11:37:46.000-08:00
- TestNextToken() reads in input string and checks tokens get assigned right type and literal

- Token defines Token struct and intial token constants:
Identifiers + literals, Operators, Delimiters, Keywords

- Lexer converts source code to tokens:
New(), readChar(), NextToken(), newToken()
diff --git a/.envrc b/.envrc
@@ -0,0 +1 @@
+export GOPATH=$(pwd)
diff --git a/pkg/darwin_amd64/monkey/token.a b/pkg/darwin_amd64/monkey/token.a
diff --git a/src/monkey/lexer/lexer.go b/src/monkey/lexer/lexer.go
@@ -0,0 +1,71 @@
+package lexer
+
+import "monkey/token"
+
+type Lexer struct {
+	input        string
+	position     int  // current position in input (points to current char)
+	readPosition int  // next reading position in input
+	ch           byte // current char
+
+	// Both position and readPosition are to access characters in input by using
+	// them as an index, ex.: l.input[l.readPosition]. These 2 “pointers” point into
+	// our input string because we need to “peek” ahead to see what comes
+	// next after the current character. readPosition always points to the “next”
+	// input character. position points to input character corresponding to
+	// the ch byte.
+}
+
+func New(input string) *Lexer {
+	l := &Lexer{input: input}
+	l.readChar()
+	return l
+}
+
+// Only for ASCII characters, not full UTF-8
+func (l *Lexer) readChar() {
+
+	// If hit end of input, set ch = 0 (NUL character).
+	// So we've read nothing yet or it's EOF.
+	if l.readPosition >= len(l.input) {
+		l.ch = 0
+
+	} else {
+		l.ch = l.input[l.readPosition] // next character
+	}
+
+	l.position = l.readPosition
+	l.readPosition += 1
+}
+
+func (l *Lexer) NextToken() token.Token {
+	var tok token.Token
+	switch l.ch {
+	case '=':
+		tok = newToken(token.ASSIGN, l.ch)
+	case ';':
+		tok = newToken(token.SEMICOLON, l.ch)
+	case '(':
+		tok = newToken(token.LPAREN, l.ch)
+	case ')':
+		tok = newToken(token.RPAREN, l.ch)
+	case ',':
+		tok = newToken(token.COMMA, l.ch)
+	case '+':
+		tok = newToken(token.PLUS, l.ch)
+	case '{':
+		tok = newToken(token.LBRACE, l.ch)
+	case '}':
+		tok = newToken(token.RBRACE, l.ch)
+	case 0:
+		tok.Literal = ""
+		tok.Type = token.EOF
+	}
+	l.readChar()
+
+	return tok
+}
+
+func newToken(tokenType token.TokenType, ch byte) token.Token {
+	return token.Token{Type: tokenType, Literal: string(ch)}
+}
diff --git a/src/monkey/lexer/lexer_test.go b/src/monkey/lexer/lexer_test.go
@@ -0,0 +1,35 @@
+package lexer
+
+import (
+	"monkey/token"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNextToken(t *testing.T) {
+	input := `=+(){},;`
+	tests := []struct {
+		expectedType    token.TokenType
+		expectedLiteral string
+	}{
+		{token.ASSIGN, "="},
+		{token.PLUS, "+"},
+		{token.LPAREN, "("},
+		{token.RPAREN, ")"},
+		{token.LBRACE, "{"},
+		{token.RBRACE, "}"},
+		{token.COMMA, ","},
+		{token.SEMICOLON, ";"},
+		{token.EOF, ""},
+	}
+
+	l := New(input)
+
+	for _, test := range tests {
+		tok := l.NextToken()
+
+		assert.Equal(t, test.expectedType, tok.Type)
+		assert.Equal(t, test.expectedLiteral, tok.Literal)
+	}
+}
diff --git a/src/monkey/token/token.go b/src/monkey/token/token.go
@@ -0,0 +1,33 @@
+package token
+
+type TokenType string
+
+type Token struct {
+	Type    TokenType
+	Literal string
+}
+
+const (
+	ILLEGAL = "ILLEGAL" // a token/character not on the list
+	EOF     = "EOF"     // parser stops here
+
+	// Identifiers + literals
+	IDENT = "IDENT" // variable names
+	INT   = "INT"
+
+	// Operators
+	ASSIGN = "="
+	PLUS   = "+"
+
+	// Delimiters
+	COMMA     = ","
+	SEMICOLON = ";"
+	LPAREN    = "("
+	RPAREN    = ")"
+	LBRACE    = "{"
+	RBRACE    = "}"
+
+	// Keywords
+	FUNCTION = "FUNCTION"
+	LET      = "LET"
+)