@@ -49,6 +49,7 @@ pub const GraphemePool = struct {
4949
5050 allocator : std.mem.Allocator ,
5151 classes : [MAX_CLASSES ]ClassPool ,
52+ interned_live_ids : std .StringHashMapUnmanaged (IdPayload ),
5253
5354 const SlotHeader = extern struct {
5455 len : u16 ,
@@ -68,16 +69,81 @@ pub const GraphemePool = struct {
6869 while (i < MAX_CLASSES ) : (i += 1 ) {
6970 classes [i ] = ClassPool .init (allocator , CLASS_SIZES [i ], slots_per_page [i ]);
7071 }
71- return .{ .allocator = allocator , .classes = classes };
72+ return .{ .allocator = allocator , .classes = classes , . interned_live_ids = .{} };
7273 }
7374
7475 pub fn deinit (self : * GraphemePool ) void {
76+ var key_it = self .interned_live_ids .keyIterator ();
77+ while (key_it .next ()) | key_ptr | {
78+ self .allocator .free (@constCast (key_ptr .* ));
79+ }
80+ self .interned_live_ids .deinit (self .allocator );
81+
7582 var i : usize = 0 ;
7683 while (i < MAX_CLASSES ) : (i += 1 ) {
7784 self .classes [i ].deinit ();
7885 }
7986 }
8087
88+ /// removeInternedLiveId removes an interned ID from the live set if it
89+ /// matches the expected ID.
90+ fn removeInternedLiveId (self : * GraphemePool , bytes : []const u8 , expected_id : IdPayload ) void {
91+ const live_id = self .interned_live_ids .get (bytes ) orelse return ;
92+ if (live_id != expected_id ) return ;
93+ if (self .interned_live_ids .fetchRemove (bytes )) | removed | {
94+ self .allocator .free (@constCast (removed .key ));
95+ }
96+ }
97+
98+ /// lookupOrInvalidate checks if the given bytes are already interned and live, returning the existing ID if so.
99+ fn lookupOrInvalidate (self : * GraphemePool , bytes : []const u8 ) ? IdPayload {
100+ const live_id = self .interned_live_ids .get (bytes ) orelse return null ;
101+
102+ // Verify that the live ID is still valid and matches the bytes. If get
103+ // fails, the ID is no longer valid, so remove it from the interned map.
104+ const live_bytes = self .get (live_id ) catch {
105+ self .removeInternedLiveId (bytes , live_id );
106+ return null ;
107+ };
108+
109+ // If the bytes don't match, this means the ID was recycled and now points
110+ // to different data. Invalidate the interned ID.
111+ if (! std .mem .eql (u8 , live_bytes , bytes )) {
112+ self .removeInternedLiveId (bytes , live_id );
113+ return null ;
114+ }
115+
116+ // check refcount > 0 to ensure the ID is still live. If refcount is 0,
117+ // the slot is free but hasn't been reused yet, so we can treat it as
118+ // not found.
119+ const live_refcount = self .getRefcount (live_id ) catch {
120+ self .removeInternedLiveId (bytes , live_id );
121+ return null ;
122+ };
123+ if (live_refcount == 0 ) {
124+ self .removeInternedLiveId (bytes , live_id );
125+ return null ;
126+ }
127+
128+ return live_id ;
129+ }
130+
131+ /// internLiveId interns the grapheme bytes.
132+ fn internLiveId (self : * GraphemePool , id : IdPayload , bytes : []const u8 ) GraphemePoolError ! void {
133+ if (self .lookupOrInvalidate (bytes ) != null ) {
134+ // Keep existing interned ID if it's still valid.
135+ return ;
136+ }
137+
138+ const owned_key = self .allocator .dupe (u8 , bytes ) catch return GraphemePoolError .OutOfMemory ;
139+ errdefer self .allocator .free (owned_key );
140+
141+ if (self .interned_live_ids .fetchPut (self .allocator , owned_key , id ) catch return GraphemePoolError .OutOfMemory ) | replaced | {
142+ // A previous key allocation was replaced.
143+ self .allocator .free (@constCast (replaced .key ));
144+ }
145+ }
146+
81147 fn classForSize (size : usize ) u32 {
82148 if (size <= 8 ) return 0 ;
83149 if (size <= 16 ) return 1 ;
@@ -94,6 +160,10 @@ pub const GraphemePool = struct {
94160 }
95161
96162 pub fn alloc (self : * GraphemePool , bytes : []const u8 ) GraphemePoolError ! IdPayload {
163+ if (self .lookupOrInvalidate (bytes )) | live_id | {
164+ return live_id ;
165+ }
166+
97167 const class_id : u32 = classForSize (bytes .len );
98168 const slot_index = try self .classes [class_id ].allocInternal (bytes , true );
99169 const generation = self .classes [class_id ].getGeneration (slot_index );
@@ -116,14 +186,35 @@ pub const GraphemePool = struct {
116186 if (class_id >= MAX_CLASSES ) return GraphemePoolError .InvalidId ;
117187 const slot_index : u32 = id & SLOT_MASK ;
118188 const generation : u32 = (id >> SLOT_BITS ) & GENERATION_MASK ;
189+ const old_refcount = try self .classes [class_id ].getRefcount (slot_index , generation );
119190 try self .classes [class_id ].incref (slot_index , generation );
191+
192+ if (old_refcount == 0 ) {
193+ const is_owned = try self .classes [class_id ].isOwned (slot_index , generation );
194+ if (is_owned ) {
195+ // This is a transition from 0 to 1 for owned bytes, so intern it.
196+ const bytes = try self .classes [class_id ].get (slot_index , generation );
197+ try self .internLiveId (id , bytes );
198+ }
199+ }
120200 }
121201
122202 pub fn decref (self : * GraphemePool , id : IdPayload ) GraphemePoolError ! void {
123203 const class_id : u32 = (id >> (GENERATION_BITS + SLOT_BITS )) & CLASS_MASK ;
124204 if (class_id >= MAX_CLASSES ) return GraphemePoolError .InvalidId ;
125205 const slot_index : u32 = id & SLOT_MASK ;
126206 const generation : u32 = (id >> SLOT_BITS ) & GENERATION_MASK ;
207+
208+ const old_refcount = try self .classes [class_id ].getRefcount (slot_index , generation );
209+ if (old_refcount == 1 ) {
210+ const is_owned = try self .classes [class_id ].isOwned (slot_index , generation );
211+ if (is_owned ) {
212+ // This is a transition from 1 to 0 for owned bytes, remove map entry.
213+ const bytes = try self .classes [class_id ].get (slot_index , generation );
214+ self .removeInternedLiveId (bytes , id );
215+ }
216+ }
217+
127218 try self .classes [class_id ].decref (slot_index , generation );
128219 }
129220
@@ -135,6 +226,13 @@ pub const GraphemePool = struct {
135226 if (class_id >= MAX_CLASSES ) return GraphemePoolError .InvalidId ;
136227 const slot_index : u32 = id & SLOT_MASK ;
137228 const generation : u32 = (id >> SLOT_BITS ) & GENERATION_MASK ;
229+
230+ const is_owned = try self .classes [class_id ].isOwned (slot_index , generation );
231+ if (is_owned ) {
232+ const bytes = try self .classes [class_id ].get (slot_index , generation );
233+ self .removeInternedLiveId (bytes , id );
234+ }
235+
138236 try self .classes [class_id ].freeUnreferenced (slot_index , generation );
139237 }
140238
@@ -313,6 +411,14 @@ pub const GraphemePool = struct {
313411 if (header_ptr .generation != expected_generation ) return GraphemePoolError .WrongGeneration ;
314412 return header_ptr .refcount ;
315413 }
414+
415+ pub fn isOwned (self : * ClassPool , slot_index : u32 , expected_generation : u32 ) GraphemePoolError ! bool {
416+ if (slot_index >= self .num_slots ) return GraphemePoolError .InvalidId ;
417+ const p = self .slotPtr (slot_index );
418+ const header_ptr = @as (* SlotHeader , @ptrCast (@alignCast (p )));
419+ if (header_ptr .generation != expected_generation ) return GraphemePoolError .WrongGeneration ;
420+ return header_ptr .is_owned == 1 ;
421+ }
316422 };
317423};
318424
0 commit comments