diff --git a/core/lexer.cpp b/core/lexer.cpp
index 31599f4d7..205b509a9 100644
--- a/core/lexer.cpp
+++ b/core/lexer.cpp
@@ -217,9 +217,11 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
// https://www.json.org/img/number.png
// Note, we deviate from the json.org documentation as follows:
- // There is no reason to lex negative numbers as atomic tokens, it is better to parse them
- // as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as
- // instead of the intended .
+ // * There is no reason to lex negative numbers as atomic tokens, it is better to parse them
+ // as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as
+ // instead of the intended .
+ // * We support digit separators using the _ character for readability in
+ // large numeric literals.
enum State {
BEGIN,
@@ -227,9 +229,11 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
AFTER_ONE_TO_NINE,
AFTER_DOT,
AFTER_DIGIT,
+ AFTER_UNDERSCORE,
AFTER_E,
AFTER_EXP_SIGN,
- AFTER_EXP_DIGIT
+ AFTER_EXP_DIGIT,
+ AFTER_EXP_UNDERSCORE
} state;
std::string r;
@@ -262,6 +266,8 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case 'e':
case 'E': state = AFTER_E; break;
+ case '_': state = AFTER_UNDERSCORE; goto skip_char;
+
default: goto end;
}
break;
@@ -284,6 +290,8 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_ONE_TO_NINE; break;
+ case '_': state = AFTER_UNDERSCORE; goto skip_char;
+
default: goto end;
}
break;
@@ -325,10 +333,34 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_DIGIT; break;
+ case '_': state = AFTER_UNDERSCORE; goto skip_char;
+
default: goto end;
}
break;
+ case AFTER_UNDERSCORE:
+ switch (*c) {
+ // The only valid transition from _ is to a digit.
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': state = AFTER_ONE_TO_NINE; break;
+
+ default: {
+ std::stringstream ss;
+ ss << "couldn't lex number, junk after _: " << *c;
+ throw StaticError(filename, begin, ss.str());
+ }
+ }
+ break;
+
case AFTER_E:
switch (*c) {
case '+':
@@ -386,12 +418,38 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '7':
case '8':
case '9': state = AFTER_EXP_DIGIT; break;
+
+ case '_': state = AFTER_EXP_UNDERSCORE; goto skip_char;
default: goto end;
}
break;
+
+ case AFTER_EXP_UNDERSCORE:
+ switch (*c) {
+ // The only valid transition from _ is to a digit.
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': state = AFTER_EXP_DIGIT; break;
+
+ default: {
+ std::stringstream ss;
+ ss << "couldn't lex number, junk after _: " << *c;
+ throw StaticError(filename, begin, ss.str());
+ }
+ }
+ break;
}
r += *c;
+
+skip_char:
c++;
}
end:
diff --git a/core/lexer_test.cpp b/core/lexer_test.cpp
index 5b843e580..e1fba13c3 100644
--- a/core/lexer_test.cpp
+++ b/core/lexer_test.cpp
@@ -117,6 +117,55 @@ TEST(Lexer, TestNumbers)
"number 1e+!:1:1: couldn't lex number, junk after exponent sign: !");
}
+TEST(Lexer, TestNumbersWithSeparators)
+{
+ testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
+ testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
+ testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
+ testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");
+ testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, "");
+ testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, "");
+ testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
+ testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
+ testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
+ testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
+ testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");
+ testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, "");
+
+ testLex("number 123456_!",
+ "123456_!",
+ {},
+ "number 123456_!:1:1: couldn't lex number, junk after _: !");
+ testLex("number 123__456",
+ "123__456",
+ {},
+ "number 123__456:1:1: couldn't lex number, junk after _: _");
+ testLex("number 1_200_.0",
+ "1_200_.0",
+ {},
+ "number 1_200_.0:1:1: couldn't lex number, junk after _: .");
+ testLex("number 1_200._0",
+ "1_200._0",
+ {},
+ "number 1_200._0:1:1: couldn't lex number, junk after decimal point: _");
+ testLex("number 1_200_e2",
+ "1_200_e2",
+ {},
+ "number 1_200_e2:1:1: couldn't lex number, junk after _: e");
+ testLex("number 1_200e_2",
+ "1_200e_2",
+ {},
+ "number 1_200e_2:1:1: couldn't lex number, junk after 'E': _");
+ testLex("number 200e-_2",
+ "200e-_2",
+ {},
+ "number 200e-_2:1:1: couldn't lex number, junk after exponent sign: _");
+ testLex("number 200e+_2",
+ "200e+_2",
+ {},
+ "number 200e+_2:1:1: couldn't lex number, junk after exponent sign: _");
+}
+
TEST(Lexer, TestDoubleStrings)
{
testLex("double string \"hi\"", "\"hi\"", {Token(Token::Kind::STRING_DOUBLE, "hi")}, "");
@@ -328,6 +377,7 @@ TEST(Lexer, TestIdentifier)
"foo bar123",
{Token(Token::Kind::IDENTIFIER, "foo"), Token(Token::Kind::IDENTIFIER, "bar123")},
"");
+ testLex("identifier _123", "_123", {Token(Token::Kind::IDENTIFIER, "_123")}, "");
}
TEST(Lexer, TestComments)
diff --git a/doc/_includes/examples/syntax.jsonnet b/doc/_includes/examples/syntax.jsonnet
index eb1ab0d21..8a5288e6c 100644
--- a/doc/_includes/examples/syntax.jsonnet
+++ b/doc/_includes/examples/syntax.jsonnet
@@ -3,21 +3,6 @@
{
cocktails: {
// Ingredient quantities are in fl oz.
- 'Tom Collins': {
- ingredients: [
- { kind: "Farmer's Gin", qty: 1.5 },
- { kind: 'Lemon', qty: 1 },
- { kind: 'Simple Syrup', qty: 0.5 },
- { kind: 'Soda', qty: 2 },
- { kind: 'Angostura', qty: 'dash' },
- ],
- garnish: 'Maraschino Cherry',
- served: 'Tall',
- description: |||
- The Tom Collins is essentially gin and
- lemonade. The bitters add complexity.
- |||,
- },
Manhattan: {
ingredients: [
{ kind: 'Rye', qty: 2.5 },
@@ -28,5 +13,19 @@
served: 'Straight Up',
description: @'A clear \ red drink.',
},
+ 'Trinidad Sour': {
+ ingredients: [
+ { kind: 'Angostura bitters', qty: 1.333_333 },
+ { kind: 'Rye whiskey', qty: 0.5 },
+ { kind: 'Fresh lemon juice', qty: 0.75 },
+ { kind: 'Orgeat syrup', qty: 1 },
+ ],
+ garnish: 'Lemon twist',
+ served: 'chilled Nick & Nora glass',
+ description: |||
+ Boldly balanced: 1 1/3 oz Angostura
+ transforms bitters into the star spirit.
+ |||,
+ },
},
}
diff --git a/doc/_includes/examples/syntax.jsonnet.golden b/doc/_includes/examples/syntax.jsonnet.golden
index 6108519fd..1e9c7b5bf 100644
--- a/doc/_includes/examples/syntax.jsonnet.golden
+++ b/doc/_includes/examples/syntax.jsonnet.golden
@@ -19,32 +19,28 @@
],
"served": "Straight Up"
},
- "Tom Collins": {
- "description": "The Tom Collins is essentially gin and\nlemonade. The bitters add complexity.\n",
- "garnish": "Maraschino Cherry",
+ "Trinidad Sour": {
+ "description": "Boldly balanced: 1 1/3 oz Angostura\ntransforms bitters into the star spirit.\n",
+ "garnish": "Lemon twist",
"ingredients": [
{
- "kind": "Farmer's Gin",
- "qty": 1.5
- },
- {
- "kind": "Lemon",
- "qty": 1
+ "kind": "Angostura bitters",
+ "qty": 1.333333
},
{
- "kind": "Simple Syrup",
+ "kind": "Rye whiskey",
"qty": 0.5
},
{
- "kind": "Soda",
- "qty": 2
+ "kind": "Fresh lemon juice",
+ "qty": 0.75
},
{
- "kind": "Angostura",
- "qty": "dash"
+ "kind": "Orgeat syrup",
+ "qty": 1
}
],
- "served": "Tall"
+ "served": "chilled Nick & Nora glass"
}
}
}
diff --git a/doc/learning/tutorial.html b/doc/learning/tutorial.html
index 3ac5520bf..aa9b4927e 100644
--- a/doc/learning/tutorial.html
+++ b/doc/learning/tutorial.html
@@ -60,6 +60,9 @@ Syntax
Verbatim strings @'foo'
and @"foo"
are for single lines.
+
+ Large numeric literals may be rendered more readable by using underscores, e.g. 1_000_000
.
+
Using the interactive demo below, try modifying the strings / quantities. Try adding a "Dry
diff --git a/doc/ref/spec.html b/doc/ref/spec.html
index 9cfd6c01b..0338ca624 100644
--- a/doc/ref/spec.html
+++ b/doc/ref/spec.html
@@ -142,8 +142,19 @@
Lexing
- number: As defined by JSON but without the leading
- minus.
+ number: As defined by JSON, with two exceptions:
+
+ -
+ Numeric literals may be rendered with underscores (
_
) between any two adjacent 0-9 digits
+ to improve readability. The underscores are discarded by the lexer.
+
+ Examples: 1_000_000
, 0.000_001
, 6.022_140_76e23
+
+ -
+ Negative numbers are lexed as the
-
unary operator applied to a positive number to
+ simplify parsing.
+
+
diff --git a/test_suite/digitsep.jsonnet b/test_suite/digitsep.jsonnet
new file mode 100644
index 000000000..5237198ce
--- /dev/null
+++ b/test_suite/digitsep.jsonnet
@@ -0,0 +1,19 @@
+local cases = [
+ [123_456, "123_456"],
+ [1_750_000, "1_750_000"],
+ [1_2_3, "1_2_3"],
+ [3.141_592, "3.141_592"],
+ [1_200.0, "1_200.0"],
+ [0e1_01, "0e1_01"],
+ [10_10e3, "10_10e3"],
+ [2_3e1_2, "2_3e1_2"],
+ [1.1_2e100, "1.1_2e100"],
+ [1.1e-10_1, "1.1e-10_1"],
+ [9.109_383_56e-31, "9.109_383_56e-31"],
+];
+
+local sepParse(s) = std.parseJson(std.strReplace(s, "_", ""));
+
+{
+ test_results: [std.assertEqual(c[0], sepParse(c[1])) for c in cases],
+}
diff --git a/test_suite/digitsep.jsonnet.golden b/test_suite/digitsep.jsonnet.golden
new file mode 100644
index 000000000..e9795a491
--- /dev/null
+++ b/test_suite/digitsep.jsonnet.golden
@@ -0,0 +1,15 @@
+{
+ "test_results": [
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ ]
+}
diff --git a/test_suite/error.std_parseJson.nodigitsep.jsonnet b/test_suite/error.std_parseJson.nodigitsep.jsonnet
new file mode 100644
index 000000000..f50d4f63b
--- /dev/null
+++ b/test_suite/error.std_parseJson.nodigitsep.jsonnet
@@ -0,0 +1 @@
+std.parseJson("987_543")
diff --git a/test_suite/error.std_parseJson.nodigitsep.jsonnet.golden b/test_suite/error.std_parseJson.nodigitsep.jsonnet.golden
new file mode 100644
index 000000000..4ccf27d5f
--- /dev/null
+++ b/test_suite/error.std_parseJson.nodigitsep.jsonnet.golden
@@ -0,0 +1,2 @@
+RUNTIME ERROR: [json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: '987_'; expected end of input
+ error.std_parseJson.nodigitsep.jsonnet:1:1-25