Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,025 changes: 1,014 additions & 11 deletions src/base/Ident.zig

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/base/Scratch.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! when working with recursive operations

const std = @import("std");
const base = @import("../base.zig");
const base = @import("mod.zig");

/// A stack for easily adding and removing index types when doing recursive operations
pub fn Scratch(comptime T: type) type {
Expand Down
71 changes: 41 additions & 30 deletions src/base/test/base_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,12 @@ test "Ident.Store basic CompactWriter roundtrip" {
const idx3 = try original.insert(gpa, ident3);

// Verify the attributes in the indices
try std.testing.expect(!idx1.attributes.effectful);
try std.testing.expect(!idx1.attributes.ignored);
try std.testing.expect(idx2.attributes.effectful);
try std.testing.expect(!idx2.attributes.ignored);
try std.testing.expect(!idx3.attributes.effectful);
try std.testing.expect(idx3.attributes.ignored);
try std.testing.expect(!idx1.attributes().effectful);
try std.testing.expect(!idx1.attributes().ignored);
try std.testing.expect(idx2.attributes().effectful);
try std.testing.expect(!idx2.attributes().ignored);
try std.testing.expect(!idx3.attributes().effectful);
try std.testing.expect(idx3.attributes().ignored);

// Create a temp file
var tmp_dir = std.testing.tmpDir(.{});
Expand Down Expand Up @@ -284,11 +284,15 @@ test "Ident.Store basic CompactWriter roundtrip" {

// Check the bytes length for validation
const bytes_len = deserialized.interner.bytes.len();
const idx1_value = @intFromEnum(@as(SmallStringInterner.Idx, @enumFromInt(@as(u32, idx1.idx))));

// Verify the index is valid
if (bytes_len <= idx1_value) {
return error.InvalidIndex;
// Get the interner index if it exists (skip validation for small indices)
if (idx1.getInternerIdx()) |interner_idx| {
const idx1_value = @intFromEnum(@as(SmallStringInterner.Idx, @enumFromInt(interner_idx)));

// Verify the index is valid
if (bytes_len <= idx1_value) {
return error.InvalidIndex;
}
}

// Verify the identifiers are accessible
Expand Down Expand Up @@ -454,30 +458,32 @@ test "Ident.Store comprehensive CompactWriter roundtrip" {
defer original.deinit(gpa);

// Test various identifier types and edge cases
// Note: SmallStringInterner starts with a 0 byte at index 0, so strings start at index 1
const test_idents = [_]struct { text: []const u8, expected_idx: u32 }{
.{ .text = "hello", .expected_idx = 1 },
.{ .text = "world!", .expected_idx = 7 },
.{ .text = "_ignored", .expected_idx = 14 },
.{ .text = "a", .expected_idx = 23 }, // single character
.{ .text = "very_long_identifier_name_that_might_cause_issues", .expected_idx = 25 },
.{ .text = "effectful!", .expected_idx = 75 },
.{ .text = "_", .expected_idx = 86 }, // Just underscore
.{ .text = "CamelCase", .expected_idx = 88 },
.{ .text = "snake_case", .expected_idx = 98 },
.{ .text = "SCREAMING_CASE", .expected_idx = 109 },
.{ .text = "hello", .expected_idx = 1 }, // duplicate, should reuse
// Don't check specific indices as they depend on implementation details
// Just verify that identifiers round-trip correctly
const test_idents = [_][]const u8{
"hello", // 5 chars, goes to interner
"world!", // has !, goes to interner
"_ignored", // starts with _, goes to interner
"a", // single character - may be inlined
"very_long_identifier_name_that_might_cause_issues",
"effectful!",
"_", // Just underscore
"CamelCase",
"snake_case",
"SCREAMING_CASE",
"hello", // duplicate, should reuse
};

var indices = std.ArrayList(Ident.Idx).init(gpa);
defer indices.deinit();

for (test_idents) |test_ident| {
const ident = Ident.for_text(test_ident.text);
for (test_idents) |text| {
const ident = Ident.for_text(text);
const idx = try original.insert(gpa, ident);
try indices.append(idx);
// Verify the index matches expectation
try std.testing.expectEqual(test_ident.expected_idx, idx.idx);
// Just verify we can retrieve the text correctly
const retrieved = original.getText(idx);
try std.testing.expectEqualStrings(text, retrieved);
}

// Add some unique names
Expand Down Expand Up @@ -530,10 +536,10 @@ test "Ident.Store comprehensive CompactWriter roundtrip" {
deserialized.relocate(@as(isize, @intCast(@intFromPtr(buffer.ptr))));

// Verify all identifiers (skip duplicate at end)
for (test_idents[0..10], 0..) |test_ident, i| {
for (test_idents[0..10], 0..) |expected_text, i| {
const idx = indices.items[i];
const text = deserialized.getText(idx);
try std.testing.expectEqualStrings(test_ident.text, text);
try std.testing.expectEqualStrings(expected_text, text);
}

// Verify unique names
Expand All @@ -542,7 +548,8 @@ test "Ident.Store comprehensive CompactWriter roundtrip" {

// Verify the interner's entry count is preserved
// We inserted 10 unique strings + 2 generated unique names = 12 total
try std.testing.expectEqual(@as(u32, 12), deserialized.interner.entry_count);
// With SmallIdx, "a" is inlined so we have one less entry in the interner
try std.testing.expectEqual(@as(u32, 11), deserialized.interner.entry_count);

// Verify next_unique_name
try std.testing.expectEqual(@as(u32, 2), deserialized.next_unique_name);
Expand Down Expand Up @@ -1695,3 +1702,7 @@ test "TargetUsize conversion to usize" {
try std.testing.expectEqual(TargetUsize.u32.size(), 4);
try std.testing.expectEqual(TargetUsize.u64.size(), 8);
}

test {
_ = @import("ident_corruption_test.zig");
}
83 changes: 83 additions & 0 deletions src/base/test/ident_corruption_test.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
const std = @import("std");
const base = @import("base");
const Ident = base.Ident;

// Test 1: nominal_import_wildcard.md regression
// The diff shows "red" corrupted to "aed"
test "red identifier should not be corrupted" {
const testing = std.testing;

const idx_opt = Ident.Idx.try_inline("red");
try testing.expect(idx_opt != null);

const idx = idx_opt.?;
const inner = idx.toInner();
try testing.expect(inner.is_small);

// Create a copy to avoid alignment issues with packed struct
const small_copy = inner.data.small;
var buffer: [7]u8 = undefined;
const result = small_copy.writeTextToBuffer(&buffer);

// This should be "red", not "aed"
try testing.expectEqualStrings("red", result);
}

// Test 2: underscore_in_regular_annotations.md regression
// The diff shows "_field2" corrupted to "_field"
test "_field2 identifier should not be corrupted" {
const testing = std.testing;
const gpa = testing.allocator;

var store = try Ident.Store.initCapacity(gpa, 10);
defer store.deinit(gpa);

const ident = Ident.for_text("_field2");
const idx = try store.insert(gpa, ident);
const result = store.getText(idx);

// This should be "_field2", not "_field"
try testing.expectEqualStrings("_field2", result);
}

// Test 3: plume_package/Color.md regression
// The diff shows "U8" corrupted to "a8"
test "U8 identifier should not be corrupted" {
const testing = std.testing;

const idx_opt = Ident.Idx.try_inline("U8");
try testing.expect(idx_opt != null);

const idx = idx_opt.?;
const inner = idx.toInner();
try testing.expect(inner.is_small);

// Create a copy to avoid alignment issues with packed struct
const small_copy = inner.data.small;
var buffer: [7]u8 = undefined;
const result = small_copy.writeTextToBuffer(&buffer);

// This should be "U8", not "a8"
try testing.expectEqualStrings("U8", result);
}

// Test 4: test_exact_pattern_crash.md regression
// The diff shows "_e" corrupted to "_a"
test "_e identifier should not be corrupted" {
const testing = std.testing;

const idx_opt = Ident.Idx.try_inline("_e");
try testing.expect(idx_opt != null);

const idx = idx_opt.?;
const inner = idx.toInner();
try testing.expect(inner.is_small);

// Create a copy to avoid alignment issues with packed struct
const small_copy = inner.data.small;
var buffer: [7]u8 = undefined;
const result = small_copy.writeTextToBuffer(&buffer);

// This should be "_e", not "_a"
try testing.expectEqualStrings("_e", result);
}
11 changes: 3 additions & 8 deletions src/cache/CacheManager.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ const Cache = cache_mod.CacheModule;
const CacheConfig = cache_mod.CacheConfig;
const CacheStats = cache_mod.CacheStats;
const CacheReporting = @import("CacheReporting.zig");
const SERIALIZATION_ALIGNMENT = 16;
const coordinate_simple = @import("../coordinate_simple.zig");

const Allocator = std.mem.Allocator;
Expand Down Expand Up @@ -115,7 +114,7 @@ pub const CacheManager = struct {
///
/// Serializes the ProcessResult and stores it in the cache using BLAKE3-based
/// filenames with subdirectory splitting.
pub fn store(self: *Self, cache_key: [32]u8, process_result: *const coordinate_simple.ProcessResult) !void {
pub fn store(self: *Self, cache_key: [32]u8, process_result: *const coordinate_simple.ProcessResult, arena_allocator: Allocator) !void {
if (!self.config.enabled) {
return;
}
Expand All @@ -129,11 +128,7 @@ pub const CacheManager = struct {
return;
};

// Create arena for serialization
var arena = std.heap.ArenaAllocator.init(self.allocator);
defer arena.deinit();

const cache_data = Cache.create(self.allocator, arena.allocator(), process_result.cir, process_result.cir, process_result.error_count, process_result.warning_count) catch |err| {
const cache_data = Cache.create(self.allocator, arena_allocator, process_result.cir, process_result.cir, process_result.error_count, process_result.warning_count) catch |err| {
if (self.config.verbose) {
std.log.debug("Failed to serialize cache data: {}", .{err});
}
Expand Down Expand Up @@ -249,7 +244,7 @@ pub const CacheManager = struct {
/// The caller must not free it after calling this function.
fn restoreFromCache(
self: *Self,
cache_data: []align(SERIALIZATION_ALIGNMENT) const u8,
cache_data: []align(@alignOf(ModuleEnv)) const u8,
source: []const u8,
) !struct {
result: coordinate_simple.ProcessResult,
Expand Down
39 changes: 14 additions & 25 deletions src/cache/CacheModule.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ const serialization = @import("serialization");
const SExprTree = base.SExprTree;
const Filesystem = @import("../fs/Filesystem.zig");

const SERIALIZATION_ALIGNMENT = 16;

const Allocator = std.mem.Allocator;
const TypeStore = types.Store;
const ModuleEnv = compile.ModuleEnv;
Expand Down Expand Up @@ -43,9 +41,6 @@ pub const Header = struct {
error_count: u32,
warning_count: u32,

/// Padding to ensure alignment
_padding: [12]u8 = [_]u8{0} ** 12,

/// Error specific to initializing a Header from bytes
pub const InitError = error{
PartialRead,
Expand Down Expand Up @@ -79,7 +74,7 @@ pub const Header = struct {
/// Memory-mapped cache that can be read directly from disk
pub const CacheModule = struct {
header: *const Header,
data: []align(SERIALIZATION_ALIGNMENT) const u8,
data: []align(@alignOf(ModuleEnv)) const u8,

/// Create a cache by serializing ModuleEnv and CIR data.
/// The provided allocator is used for the returned cache data, while
Expand All @@ -91,7 +86,7 @@ pub const CacheModule = struct {
_: *const ModuleEnv, // ModuleEnv contains the canonical IR
error_count: u32,
warning_count: u32,
) ![]align(SERIALIZATION_ALIGNMENT) u8 {
) ![]align(@alignOf(ModuleEnv)) u8 {
const CompactWriter = serialization.CompactWriter;

// Create CompactWriter
Expand All @@ -111,9 +106,9 @@ pub const CacheModule = struct {
const total_data_size = writer.total_bytes;

// Allocate cache_data for header + data
const header_size = std.mem.alignForward(usize, @sizeOf(Header), SERIALIZATION_ALIGNMENT);
const header_size = std.mem.alignForward(usize, @sizeOf(Header), @alignOf(ModuleEnv));
const total_size = header_size + total_data_size;
const cache_data = try allocator.alignedAlloc(u8, SERIALIZATION_ALIGNMENT, total_size);
const cache_data = try allocator.alignedAlloc(u8, @alignOf(ModuleEnv), total_size);
errdefer allocator.free(cache_data);

// Initialize header
Expand All @@ -124,7 +119,6 @@ pub const CacheModule = struct {
.data_size = @intCast(total_data_size),
.error_count = error_count,
.warning_count = warning_count,
._padding = [_]u8{0} ** 12,
};

// Consolidate the scattered iovecs into the cache data buffer
Expand All @@ -140,7 +134,7 @@ pub const CacheModule = struct {
}

/// Load a cache from memory-mapped data
pub fn fromMappedMemory(mapped_data: []align(SERIALIZATION_ALIGNMENT) const u8) !CacheModule {
pub fn fromMappedMemory(mapped_data: []align(@alignOf(ModuleEnv)) const u8) !CacheModule {
if (mapped_data.len < @sizeOf(Header)) {
return error.BufferTooSmall;
}
Expand All @@ -156,12 +150,12 @@ pub const CacheModule = struct {
if (mapped_data.len < expected_total_size) return error.BufferTooSmall;

// Get data section (must be aligned)
const header_size = std.mem.alignForward(usize, @sizeOf(Header), SERIALIZATION_ALIGNMENT);
const header_size = std.mem.alignForward(usize, @sizeOf(Header), @alignOf(ModuleEnv));
const data = mapped_data[header_size .. header_size + header.data_size];

return CacheModule{
.header = header,
.data = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(data)),
.data = @as([]align(@alignOf(ModuleEnv)) const u8, @alignCast(data)),
};
}

Expand Down Expand Up @@ -222,11 +216,11 @@ pub const CacheModule = struct {
allocator: Allocator,
file_path: []const u8,
filesystem: anytype,
) ![]align(SERIALIZATION_ALIGNMENT) u8 {
) ![]align(@alignOf(ModuleEnv)) u8 {
const file_data = try filesystem.readFile(file_path, allocator);
defer allocator.free(file_data);

const buffer = try allocator.alignedAlloc(u8, SERIALIZATION_ALIGNMENT, file_data.len);
const buffer = try allocator.alignedAlloc(u8, @alignOf(ModuleEnv), file_data.len);
@memcpy(buffer, file_data);

return buffer;
Expand All @@ -235,14 +229,14 @@ pub const CacheModule = struct {
/// Tagged union to represent cache data that can be either memory-mapped or heap-allocated
pub const CacheData = union(enum) {
mapped: struct {
ptr: [*]align(SERIALIZATION_ALIGNMENT) const u8,
ptr: [*]align(@alignOf(ModuleEnv)) const u8,
len: usize,
unaligned_ptr: [*]const u8,
unaligned_len: usize,
},
allocated: []align(SERIALIZATION_ALIGNMENT) const u8,
allocated: []align(@alignOf(ModuleEnv)) const u8,

pub fn data(self: CacheData) []align(SERIALIZATION_ALIGNMENT) const u8 {
pub fn data(self: CacheData) []align(@alignOf(ModuleEnv)) const u8 {
return switch (self) {
.mapped => |m| m.ptr[0..m.len],
.allocated => |a| a,
Expand Down Expand Up @@ -324,7 +318,7 @@ pub const CacheModule = struct {
// Find the aligned portion within the mapped memory
const unaligned_ptr = @as([*]const u8, @ptrCast(result.ptr));
const addr = @intFromPtr(unaligned_ptr);
const aligned_addr = std.mem.alignForward(usize, addr, SERIALIZATION_ALIGNMENT);
const aligned_addr = std.mem.alignForward(usize, addr, @alignOf(ModuleEnv));
const offset = aligned_addr - addr;

if (offset >= file_size_usize) {
Expand All @@ -336,7 +330,7 @@ pub const CacheModule = struct {
return CacheData{ .allocated = data };
}

const aligned_ptr = @as([*]align(SERIALIZATION_ALIGNMENT) const u8, @ptrFromInt(aligned_addr));
const aligned_ptr = @as([*]align(@alignOf(ModuleEnv)) const u8, @ptrFromInt(aligned_addr));
const aligned_len = file_size_usize - offset;

return CacheData{
Expand All @@ -362,11 +356,6 @@ pub const Diagnostics = struct {
data_size: u32,
};

test "Header alignment" {
// Verify the header is properly aligned
try std.testing.expect(@sizeOf(Header) % SERIALIZATION_ALIGNMENT == 0);
}

test "create and restore cache" {
// This test is skipepd for now, but will be replaced by the iovecs/relocation approach anyway.
const skip_test = true;
Expand Down
Loading
Loading