Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions src/interchange/yaml.zig
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ pub fn Parser(comptime enc: Encoding) type {
try self.scan(.{});
}

if (self.token.line == document_end_line) {
if (self.token.data != .eof and self.token.line == document_end_line) {
return unexpectedToken();
}
},
Expand Down Expand Up @@ -2282,16 +2282,25 @@ pub fn Parser(comptime enc: Encoding) type {
.line_indent = self.line_indent,
};

// Track whether we're at the start of a new line.
// Document markers (--- and ...) are only valid at line start.
var nl = self.pos == .zero;
if (!nl) {
const prev = self.input[self.pos.sub(1).cast()];
nl = prev == '\n' or prev == '\r';
}

next: switch (self.next()) {
0 => {
return ctx.done();
},

'-' => {
if (self.line_indent == .none and self.remainStartsWith("---") and self.isAnyOrEofAt(" \t\n\r", 3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("---") and self.isAnyOrEofAt(" \t\n\r", 3)) {
return ctx.done();
}

nl = false;
if (!ctx.resolved and ctx.str_builder.len() == 0) {
try ctx.appendSource('-', self.pos);
self.inc(1);
Expand All @@ -2305,10 +2314,11 @@ pub fn Parser(comptime enc: Encoding) type {
},

'.' => {
if (self.line_indent == .none and self.remainStartsWith("...") and self.isAnyOrEofAt(" \t\n\r", 3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("...") and self.isAnyOrEofAt(" \t\n\r", 3)) {
return ctx.done();
}

nl = false;
if (!ctx.resolved and ctx.str_builder.len() == 0) {
switch (self.peek(1)) {
'n',
Expand All @@ -2335,6 +2345,7 @@ pub fn Parser(comptime enc: Encoding) type {
},

':' => {
nl = false;
if (self.isSWhiteOrBCharOrEofAt(1)) {
return ctx.done();
}
Expand Down Expand Up @@ -2365,6 +2376,7 @@ pub fn Parser(comptime enc: Encoding) type {
},

'#' => {
nl = false;
const prev = self.input[self.pos.sub(1).cast()];
if (self.pos == .zero or switch (prev) {
' ',
Expand All @@ -2388,6 +2400,7 @@ pub fn Parser(comptime enc: Encoding) type {
'{',
'}',
=> |c| {
nl = false;
switch (self.context.get()) {
.block_in,
.block_out,
Expand All @@ -2408,6 +2421,7 @@ pub fn Parser(comptime enc: Encoding) type {
' ',
'\t',
=> |c| {
nl = false;
try ctx.appendSourceWhitespace(c, self.pos);
self.inc(1);
continue :next self.next();
Expand Down Expand Up @@ -2448,10 +2462,12 @@ pub fn Parser(comptime enc: Encoding) type {

try ctx.appendWhitespaceNTimes('\n', lines);

nl = true;
continue :next self.next();
},

else => |c| {
nl = false;
if (ctx.resolved or ctx.str_builder.len() != 0) {
const start = self.pos;
self.inc(1);
Expand Down Expand Up @@ -2921,7 +2937,11 @@ pub fn Parser(comptime enc: Encoding) type {
},

'-' => {
if (self.line_indent == .none and self.remainStartsWith("---") and self.isAnyOrEofAt(" \t\n\r", 3)) {
const line_start = self.pos == .zero or switch (self.input[self.pos.sub(1).cast()]) {
'\n', '\r' => true,
else => false,
};
if (line_start and self.line_indent == .none and self.remainStartsWith("---") and self.isAnyOrEofAt(" \t\n\r", 3)) {
return ctx.done(false);
}

Expand All @@ -2940,7 +2960,11 @@ pub fn Parser(comptime enc: Encoding) type {
},

'.' => {
if (self.line_indent == .none and self.remainStartsWith("...") and self.isAnyOrEofAt(" \t\n\r", 3)) {
const line_start = self.pos == .zero or switch (self.input[self.pos.sub(1).cast()]) {
'\n', '\r' => true,
else => false,
};
if (line_start and self.line_indent == .none and self.remainStartsWith("...") and self.isAnyOrEofAt(" \t\n\r", 3)) {
return ctx.done(false);
}

Expand Down Expand Up @@ -3644,7 +3668,11 @@ pub fn Parser(comptime enc: Encoding) type {
'-' => {
const start = self.pos;

if (self.line_indent == .none and self.remainStartsWith(enc.literal("---")) and self.isSWhiteOrBCharOrEofAt(3)) {
const line_start = self.pos == .zero or switch (self.input[self.pos.sub(1).cast()]) {
'\n', '\r' => true,
else => false,
};
if (line_start and self.line_indent == .none and self.remainStartsWith(enc.literal("---")) and self.isSWhiteOrBCharOrEofAt(3)) {
self.inc(3);
break :next .documentStart(.{
.start = start,
Expand Down Expand Up @@ -3724,7 +3752,11 @@ pub fn Parser(comptime enc: Encoding) type {
'.' => {
const start = self.pos;

if (self.line_indent == .none and self.remainStartsWith(enc.literal("...")) and self.isSWhiteOrBCharOrEofAt(3)) {
const line_start = self.pos == .zero or switch (self.input[self.pos.sub(1).cast()]) {
'\n', '\r' => true,
else => false,
};
if (line_start and self.line_indent == .none and self.remainStartsWith(enc.literal("...")) and self.isSWhiteOrBCharOrEofAt(3)) {
self.inc(3);
break :next .documentEnd(.{
.start = start,
Expand Down Expand Up @@ -4276,7 +4308,7 @@ pub fn Parser(comptime enc: Encoding) type {
if (pos.isLessThan(self.input.len)) {
return std.mem.indexOfScalar(enc.unit(), values, self.input[pos.cast()]) != null;
}
return false;
return true;
}

fn isEof(self: *const @This()) bool {
Expand Down
106 changes: 106 additions & 0 deletions test/regression/issue/25660.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/**
* Regression test for issue #25660
* YAML.parse() incorrectly splits on `---` inside values
*
* @see https://github.com/oven-sh/bun/issues/25660
*/
import { describe, expect, test } from "bun:test";

describe("YAML.parse document separator handling", () => {
test("should not split on --- inside scalar values", () => {
const text = `
name: some-text---
description: Lorem ipsum dolor sit amet, consectetur adipiscing elit.
`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({
name: "some-text---",
description: "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
});
});

test("should not split on --- in middle of value", () => {
const text = `key: hello---world`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({ key: "hello---world" });
});

test("should not split on ... inside scalar values", () => {
const text = `key: hello...world`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({ key: "hello...world" });
});

test("should not treat document markers as values after ':'", () => {
const text = `
key1: ---
key2: ...
`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({ key1: "---", key2: "..." });
});

test("should not treat --- at line start in multiline plain scalar as a document separator", () => {
const text = `
message: first line
--- still part of the value
last line
`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({
message: "first line --- still part of the value last line",
});
});

test("should correctly handle actual document separator at line start", () => {
const text = `
doc1: value1
---
doc2: value2
`;
const parsed = Bun.YAML.parse(text);
// When there's an actual document separator, it returns an array
expect(Array.isArray(parsed)).toBe(true);
expect(parsed).toHaveLength(2);
expect(parsed[0]).toEqual({ doc1: "value1" });
expect(parsed[1]).toEqual({ doc2: "value2" });
});

test("should handle value ending with multiple dashes", () => {
const text = `
title: My-Title---
subtitle: Another---Value---
`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({
title: "My-Title---",
subtitle: "Another---Value---",
});
});

test("should handle value ending with dots", () => {
const text = `message: Hello...`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toEqual({ message: "Hello..." });
});

test("should recognize ... as document end marker for top-level plain scalar", () => {
// This is the case Dylan pointed out: a top-level plain scalar followed by
// document end marker should correctly recognize ... as document end
const text = `hello
...`;
const parsed = Bun.YAML.parse(text);
expect(parsed).toBe("hello");
});

test("should recognize --- as document separator for top-level plain scalar", () => {
const text = `first
---
second`;
const parsed = Bun.YAML.parse(text);
expect(Array.isArray(parsed)).toBe(true);
expect(parsed).toHaveLength(2);
expect(parsed[0]).toBe("first");
expect(parsed[1]).toBe("second");
});
});