Skip to content

Commit 5c263c0

Browse files
authored
Merge pull request #136 from link-foundation/issue-135-00fb7516
Fix indentation consistency for all languages (#135)
2 parents 5d1fa51 + 7167436 commit 5c263c0

File tree

20 files changed

+1227
-169
lines changed

20 files changed

+1227
-169
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,5 +333,6 @@ ASALocalRun/
333333
.DS_Store
334334

335335
# rust
336+
rust/target/
336337
target/venv/
337338
.venv/
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
using Xunit;
2+
using System.Collections.Generic;
3+
4+
namespace Link.Foundation.Links.Notation.Tests
5+
{
6+
public class IndentationConsistencyTests
7+
{
8+
[Fact]
9+
public void LeadingSpacesVsNoLeadingSpacesShouldProduceSameResult()
10+
{
11+
// Example with 2 leading spaces (from issue #135)
12+
var withLeading = @" TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
13+
TELEGRAM_ALLOWED_CHATS:
14+
-1002975819706
15+
-1002861722681
16+
TELEGRAM_HIVE_OVERRIDES:
17+
--all-issues
18+
--once
19+
TELEGRAM_BOT_VERBOSE: true";
20+
21+
// Example without leading spaces (from issue #135)
22+
var withoutLeading = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
23+
TELEGRAM_ALLOWED_CHATS:
24+
-1002975819706
25+
-1002861722681
26+
TELEGRAM_HIVE_OVERRIDES:
27+
--all-issues
28+
--once
29+
TELEGRAM_BOT_VERBOSE: true";
30+
31+
var resultWith = new Parser().Parse(withLeading);
32+
var resultWithout = new Parser().Parse(withoutLeading);
33+
34+
// Compare the entire formatted output (complete round trip test)
35+
Assert.Equal(resultWithout.Format(), resultWith.Format());
36+
}
37+
38+
[Fact]
39+
public void TwoSpacesVsFourSpacesIndentation()
40+
{
41+
// Example with 2 spaces per level
42+
var twoSpaces = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
43+
TELEGRAM_ALLOWED_CHATS:
44+
-1002975819706
45+
-1002861722681
46+
TELEGRAM_HIVE_OVERRIDES:
47+
--all-issues
48+
--once
49+
--auto-fork
50+
--skip-issues-with-prs
51+
--attach-logs
52+
--verbose
53+
--no-tool-check
54+
TELEGRAM_SOLVE_OVERRIDES:
55+
--auto-fork
56+
--auto-continue
57+
--attach-logs
58+
--verbose
59+
--no-tool-check
60+
TELEGRAM_BOT_VERBOSE: true";
61+
62+
// Example with 4 spaces per level
63+
var fourSpaces = @"TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
64+
TELEGRAM_ALLOWED_CHATS:
65+
-1002975819706
66+
-1002861722681
67+
TELEGRAM_HIVE_OVERRIDES:
68+
--all-issues
69+
--once
70+
--auto-fork
71+
--skip-issues-with-prs
72+
--attach-logs
73+
--verbose
74+
--no-tool-check
75+
TELEGRAM_SOLVE_OVERRIDES:
76+
--auto-fork
77+
--auto-continue
78+
--attach-logs
79+
--verbose
80+
--no-tool-check
81+
TELEGRAM_BOT_VERBOSE: true";
82+
83+
var resultTwo = new Parser().Parse(twoSpaces);
84+
var resultFour = new Parser().Parse(fourSpaces);
85+
86+
// Compare the entire formatted output (complete round trip test)
87+
Assert.Equal(resultFour.Format(), resultTwo.Format());
88+
}
89+
90+
[Fact]
91+
public void SimpleTwoVsFourSpacesIndentation()
92+
{
93+
// Simple example with 2 spaces
94+
var twoSpaces = @"parent:
95+
child1
96+
child2";
97+
98+
// Simple example with 4 spaces
99+
var fourSpaces = @"parent:
100+
child1
101+
child2";
102+
103+
var resultTwo = new Parser().Parse(twoSpaces);
104+
var resultFour = new Parser().Parse(fourSpaces);
105+
106+
// Compare the entire formatted output (complete round trip test)
107+
Assert.Equal(resultFour.Format(), resultTwo.Format());
108+
}
109+
110+
[Fact]
111+
public void ThreeLevelNestingWithDifferentIndentation()
112+
{
113+
// Three levels with 2 spaces
114+
var twoSpaces = @"level1:
115+
level2:
116+
level3a
117+
level3b
118+
level2b";
119+
120+
// Three levels with 4 spaces
121+
var fourSpaces = @"level1:
122+
level2:
123+
level3a
124+
level3b
125+
level2b";
126+
127+
var resultTwo = new Parser().Parse(twoSpaces);
128+
var resultFour = new Parser().Parse(fourSpaces);
129+
130+
// Compare the entire formatted output (complete round trip test)
131+
Assert.Equal(resultFour.Format(), resultTwo.Format());
132+
}
133+
}
134+
}

csharp/Link.Foundation.Links.Notation/Parser.peg

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
@namespace Link.Foundation.Links.Notation
22
@classname Parser
33
@using System.Linq
4-
document <IList<Link<string>>> = #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); } _ l:links eof { l.ToLinksList() } / #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); } _ eof { new List<Link<string>>() }
4+
document <IList<Link<string>>> = #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } skipEmptyLines l:links _ eof { l.ToLinksList() } / #{ state["IndentationStack"] = new Stack<int>(); state["IndentationStack"].Push(0); state["BaseIndentation"] = -1; } _ eof { new List<Link<string>>() }
5+
skipEmptyLines = ([ \t]* [\r\n])*
56
links <IList<LinksGroup<string>>> = fl:firstLine list:line* POP_INDENTATION { new List<LinksGroup<string>> { fl }.Concat(list).ToList() }
6-
firstLine <LinksGroup<string>> = l:element { l }
7+
firstLine <LinksGroup<string>> = SET_BASE_INDENTATION l:element { l }
78
line <LinksGroup<string>> = CHECK_INDENTATION l:element { l }
89
element <LinksGroup<string>> = e:anyLink PUSH_INDENTATION l:links { new LinksGroup<string>(e, l) } / e:anyLink { new LinksGroup<string>(e) }
910
referenceOrLink <Link<string>> = l:multiLineAnyLink { l } / i:reference { i }
@@ -20,13 +21,14 @@ singleLineValueLink <Link<string>> = v:singleLineValues { new Link<string>(v) }
2021
multiLineValueLink <Link<string>> = "(" v:multiLineValues _ ")" { new Link<string>(v) }
2122
indentedIdLink <Link<string>> = id:(reference) __ ":" eol { new Link<string>(id) }
2223

23-
reference <string> = doubleQuotedReference / singleQuotedReference / simpleReference
24+
reference <string> = doubleQuotedReference / singleQuotedReference / simpleReference
2425
simpleReference <string> = "" referenceSymbol+
2526
doubleQuotedReference <string> = '"' r:([^"]+) '"' { string.Join("", r) }
2627
singleQuotedReference <string> = "'" r:([^']+) "'" { string.Join("", r) }
27-
PUSH_INDENTATION = spaces:" "* &{ spaces.Count > state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push(spaces.Count); }
28+
SET_BASE_INDENTATION = spaces:" "* #{ if ((int)state["BaseIndentation"] == -1) state["BaseIndentation"] = spaces.Count; }
29+
PUSH_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] > (int)state["IndentationStack"].Peek() } #{ state["IndentationStack"].Push((int)state["NormalizedIndent"]); }
2830
POP_INDENTATION = #{ state["IndentationStack"].Pop(); }
29-
CHECK_INDENTATION = spaces:" "* &{ spaces.Count >= state["IndentationStack"].Peek() }
31+
CHECK_INDENTATION = spaces:" "* #{ state["NormalizedIndent"] = spaces.Count - ((int)state["BaseIndentation"] == -1 ? 0 : (int)state["BaseIndentation"]); if ((int)state["NormalizedIndent"] < 0) state["NormalizedIndent"] = 0; } &{ (int)state["NormalizedIndent"] >= (int)state["IndentationStack"].Peek() }
3032
eol = __ ("" [\r\n]+ / eof)
3133
eof = !.
3234
__ = [ \t]*

experiments/SOLUTION_SUMMARY.md

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Solution Summary for Issue #135
2+
3+
## Problem Statement
4+
5+
The parser was treating documents with leading spaces differently than documents without leading spaces, even when the relative indentation was the same.
6+
7+
### Example of the Bug:
8+
These two should parse identically, but didn't:
9+
10+
```yaml
11+
TELEGRAM_BOT_TOKEN: 'value'
12+
TELEGRAM_ALLOWED_CHATS:
13+
item1
14+
item2
15+
```
16+
17+
```yaml
18+
TELEGRAM_BOT_TOKEN: 'value'
19+
TELEGRAM_ALLOWED_CHATS:
20+
item1
21+
item2
22+
```
23+
24+
In the first example, the parser incorrectly treated `TELEGRAM_ALLOWED_CHATS` as a child of `TELEGRAM_BOT_TOKEN` because both had 2 spaces, and the second line appeared to have the same indentation as the first.
25+
26+
## Root Cause
27+
28+
All parsers were counting **absolute** indentation (number of spaces from the start of the line) instead of **relative** indentation (increase/decrease compared to the parent level).
29+
30+
## Solution
31+
32+
The fix normalizes indentation by:
33+
1. Detecting the first content line's indentation and treating it as the baseline (level 0)
34+
2. Subtracting this baseline from all subsequent lines
35+
3. This makes the indentation **relative** to the first content line
36+
37+
### Implementation Details
38+
39+
#### Rust (`rust/src/parser.rs`)
40+
- Added `base_indentation` field to `ParserState`
41+
- Added `set_base_indentation()`, `get_base_indentation()`, and `normalize_indentation()` methods
42+
- Modified `first_line()` to capture and set the base indentation
43+
- Updated `push_indentation()` and `check_indentation()` to normalize values before comparison
44+
45+
#### JavaScript (`js/src/grammar.pegjs`)
46+
- Added `baseIndentation` variable to track the first line's indentation
47+
- Added `setBaseIndentation()` and `normalizeIndentation()` functions
48+
- Updated `document` rule to skip only empty lines (not leading spaces on content lines)
49+
- Added `SET_BASE_INDENTATION` rule called from `firstLine`
50+
- Modified `PUSH_INDENTATION` and `CHECK_INDENTATION` to use normalized values
51+
52+
#### C# (`csharp/Link.Foundation.Links.Notation/Parser.peg`)
53+
- Added `BaseIndentation` to parser state
54+
- Added `skipEmptyLines` rule to preserve leading spaces on first content line
55+
- Added `SET_BASE_INDENTATION` rule
56+
- Updated `PUSH_INDENTATION` and `CHECK_INDENTATION` to normalize indentation
57+
58+
#### Python (TODO)
59+
- Needs similar changes to `python/links_notation/parser.py`
60+
- Should track `base_indentation` in the Parser class
61+
- Update `_parse_element()` to normalize indentation values
62+
63+
## Test Coverage
64+
65+
Added comprehensive test cases for all languages:
66+
- `rust/tests/indentation_consistency_tests.rs`
67+
- `js/tests/IndentationConsistency.test.js`
68+
- `csharp/Link.Foundation.Links.Notation.Tests/IndentationConsistencyTests.cs`
69+
70+
Each test suite verifies:
71+
1. Documents with leading spaces vs no leading spaces produce identical results
72+
2. Different indentation sizes (2 vs 4 spaces) work correctly
73+
3. Multi-level nesting preserves structure regardless of indentation style
74+
75+
## Results
76+
77+
✅ **Rust**: All tests passing (106 tests)
78+
✅ **JavaScript**: All tests passing (106 tests)
79+
🔧 **C#**: Fixed, tests pending build verification
80+
⏳ **Python**: Implementation pending
81+
82+
## Verification
83+
84+
Run tests:
85+
```bash
86+
# Rust
87+
cd rust && cargo test
88+
89+
# JavaScript
90+
cd js && npm test
91+
92+
# C#
93+
cd csharp && dotnet test
94+
95+
# Python
96+
cd python && python -m pytest
97+
```
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Indentation Consistency Test
2+
3+
This document contains test cases for issue #135: Any indentation as long as it is the same on single level should not change parser semantics.
4+
5+
## Test Case 1: Two spaces vs Four spaces
6+
7+
Both of these examples should parse to exactly the same result:
8+
9+
### Example with 2 spaces per level:
10+
```
11+
TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
12+
TELEGRAM_ALLOWED_CHATS:
13+
-1002975819706
14+
-1002861722681
15+
TELEGRAM_HIVE_OVERRIDES:
16+
--all-issues
17+
--once
18+
--auto-fork
19+
--skip-issues-with-prs
20+
--attach-logs
21+
--verbose
22+
--no-tool-check
23+
TELEGRAM_SOLVE_OVERRIDES:
24+
--auto-fork
25+
--auto-continue
26+
--attach-logs
27+
--verbose
28+
--no-tool-check
29+
TELEGRAM_BOT_VERBOSE: true
30+
```
31+
32+
### Example with 4 spaces per level:
33+
```
34+
TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
35+
TELEGRAM_ALLOWED_CHATS:
36+
-1002975819706
37+
-1002861722681
38+
TELEGRAM_HIVE_OVERRIDES:
39+
--all-issues
40+
--once
41+
--auto-fork
42+
--skip-issues-with-prs
43+
--attach-logs
44+
--verbose
45+
--no-tool-check
46+
TELEGRAM_SOLVE_OVERRIDES:
47+
--auto-fork
48+
--auto-continue
49+
--attach-logs
50+
--verbose
51+
--no-tool-check
52+
TELEGRAM_BOT_VERBOSE: true
53+
```
54+
55+
## Expected Behavior
56+
57+
The parser should only care about:
58+
1. **Relative indentation** - what matters is whether a line is indented more or less than its parent
59+
2. **Consistency** - all children at the same level should have the same indentation
60+
61+
The parser should NOT care about:
62+
1. **Absolute indentation amount** - whether it's 2 spaces, 4 spaces, 8 spaces, or even tabs
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
use links_notation::parse_lino_to_links;
2+
3+
fn main() {
4+
// Example with 2 spaces
5+
let two_spaces = "parent:\n child1\n child2";
6+
7+
// Example with 4 spaces
8+
let four_spaces = "parent:\n child1\n child2";
9+
10+
println!("=== Two Spaces ===");
11+
match parse_lino_to_links(two_spaces) {
12+
Ok(links) => {
13+
println!("Parsed {} links:", links.len());
14+
for (i, link) in links.iter().enumerate() {
15+
println!(" Link {}: {}", i, link);
16+
}
17+
}
18+
Err(e) => println!("Error: {}", e),
19+
}
20+
21+
println!("\n=== Four Spaces ===");
22+
match parse_lino_to_links(four_spaces) {
23+
Ok(links) => {
24+
println!("Parsed {} links:", links.len());
25+
for (i, link) in links.iter().enumerate() {
26+
println!(" Link {}: {}", i, link);
27+
}
28+
}
29+
Err(e) => println!("Error: {}", e),
30+
}
31+
32+
// Test the issue example
33+
println!("\n=== Issue Example (leading 2 spaces) ===");
34+
let issue_two_spaces = " TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
35+
TELEGRAM_ALLOWED_CHATS:
36+
-1002975819706
37+
-1002861722681";
38+
39+
match parse_lino_to_links(issue_two_spaces) {
40+
Ok(links) => {
41+
println!("Parsed {} links:", links.len());
42+
for (i, link) in links.iter().enumerate() {
43+
println!(" Link {}: {}", i, link);
44+
}
45+
}
46+
Err(e) => println!("Error: {}", e),
47+
}
48+
49+
println!("\n=== Issue Example (no leading spaces) ===");
50+
let issue_no_leading = "TELEGRAM_BOT_TOKEN: '849...355:AAG...rgk_YZk...aPU'
51+
TELEGRAM_ALLOWED_CHATS:
52+
-1002975819706
53+
-1002861722681";
54+
55+
match parse_lino_to_links(issue_no_leading) {
56+
Ok(links) => {
57+
println!("Parsed {} links:", links.len());
58+
for (i, link) in links.iter().enumerate() {
59+
println!(" Link {}: {}", i, link);
60+
}
61+
}
62+
Err(e) => println!("Error: {}", e),
63+
}
64+
}

0 commit comments

Comments
 (0)