Skip to content

Commit 06adfb0

Browse files
authored
Add SweepChunk to native MarkSweepSpace (#1158)
This PR re-introduce the `SweepChunk` work packet to the native `MarkSweepSpace` when using eager sweeping. The main intention of this PS is fixing a pathological case where there is only one mutator and the single `ReleaseMutator` work packet cannot be parallelized. The algorithm is unchanged for lazy sweeping. When doing eager sweeping, 1. We first release all unmarked blocks in `MarkSweepSpace::abandoned` and each mutator. 2. And then we sweep blocks in parallel using `SweepChunk` work packets. 3. We then go through all "consumed" blocks and move them into "available" lists if they have any free cells. In step (1), we release blocks for each mutator in `ReleaseMutator`. Releasing blocks is very fast, so parallelism is not a bottleneck. During step (2), all blocks are either unallocated or marked, so we don't need to move any block out of linked lists, avoiding the data race we observed in #1103. Step (3) is done by one thread, but it is fast enough not to be a performance bottleneck. We also introduced a work packet `ReleaseMarkSweepSpace` which does what `MarkSweepSpace::release` did, but is executed concurrently with `ReleaseMutator` work packets. In this way, we don't do too much work in the `Release` work packet, which is a problem we discussed in #1147. We removed the constant `ABANDON_BLOCKS_IN_RESET` because it is obviously necessary to do so. Otherwise one mutator will keep too many blocks locally, preventing other mutators to get available blocks to use. We added an USDT tracepoint in `SweepChunk` in both `MarkSweepSpace` and `ImmixSpace` so that we can see the number of allocated blocks a `SweepChunk` work packet visited in the timeline generated by eBPF tracing tools. We also changed `immixspace::SweepChunk` so that it now *asserts* that the chunk is allocated rather than skipping unallocated chunks. `ChunkMap::generate_tasks` already filters out unallocated chunks. Fixes: #1146
1 parent 9658573 commit 06adfb0

File tree

7 files changed

+234
-139
lines changed

7 files changed

+234
-139
lines changed

src/plan/marksweep/global.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,6 @@ impl<VM: VMBinding> Plan for MarkSweep<VM> {
6565
self.common.release(tls, true);
6666
}
6767

68-
fn end_of_gc(&mut self, _tls: VMWorkerThread) {
69-
self.ms.end_of_gc();
70-
}
71-
7268
fn collection_required(&self, space_full: bool, _space: Option<SpaceStats<Self::VM>>) -> bool {
7369
self.base().collection_required(self, space_full)
7470
}

src/policy/immix/immixspace.rs

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -890,58 +890,59 @@ struct SweepChunk<VM: VMBinding> {
890890

891891
impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
892892
fn do_work(&mut self, _worker: &mut GCWorker<VM>, mmtk: &'static MMTK<VM>) {
893+
assert_eq!(self.space.chunk_map.get(self.chunk), ChunkState::Allocated);
894+
893895
let mut histogram = self.space.defrag.new_histogram();
894-
if self.space.chunk_map.get(self.chunk) == ChunkState::Allocated {
895-
let line_mark_state = if super::BLOCK_ONLY {
896-
None
897-
} else {
898-
Some(self.space.line_mark_state.load(Ordering::Acquire))
899-
};
900-
// Hints for clearing side forwarding bits.
901-
let is_moving_gc = mmtk.get_plan().current_gc_may_move_object();
902-
let is_defrag_gc = self.space.defrag.in_defrag();
903-
// number of allocated blocks.
904-
let mut allocated_blocks = 0;
905-
// Iterate over all allocated blocks in this chunk.
906-
for block in self
907-
.chunk
908-
.iter_region::<Block>()
909-
.filter(|block| block.get_state() != BlockState::Unallocated)
910-
{
911-
// Clear side forwarding bits.
912-
// In the beginning of the next GC, no side forwarding bits shall be set.
913-
// In this way, we can omit clearing forwarding bits when copying object.
914-
// See `GCWorkerCopyContext::post_copy`.
915-
// Note, `block.sweep()` overwrites `DEFRAG_STATE_TABLE` with the number of holes,
916-
// but we need it to know if a block is a defrag source.
917-
// We clear forwarding bits before `block.sweep()`.
918-
if let MetadataSpec::OnSide(side) = *VM::VMObjectModel::LOCAL_FORWARDING_BITS_SPEC {
919-
if is_moving_gc {
920-
let objects_may_move = if is_defrag_gc {
921-
// If it is a defrag GC, we only clear forwarding bits for defrag sources.
922-
block.is_defrag_source()
923-
} else {
924-
// Otherwise, it must be a nursery GC of StickyImmix with copying nursery.
925-
// We don't have information about which block contains moved objects,
926-
// so we have to clear forwarding bits for all blocks.
927-
true
928-
};
929-
if objects_may_move {
930-
side.bzero_metadata(block.start(), Block::BYTES);
931-
}
896+
let line_mark_state = if super::BLOCK_ONLY {
897+
None
898+
} else {
899+
Some(self.space.line_mark_state.load(Ordering::Acquire))
900+
};
901+
// Hints for clearing side forwarding bits.
902+
let is_moving_gc = mmtk.get_plan().current_gc_may_move_object();
903+
let is_defrag_gc = self.space.defrag.in_defrag();
904+
// number of allocated blocks.
905+
let mut allocated_blocks = 0;
906+
// Iterate over all allocated blocks in this chunk.
907+
for block in self
908+
.chunk
909+
.iter_region::<Block>()
910+
.filter(|block| block.get_state() != BlockState::Unallocated)
911+
{
912+
// Clear side forwarding bits.
913+
// In the beginning of the next GC, no side forwarding bits shall be set.
914+
// In this way, we can omit clearing forwarding bits when copying object.
915+
// See `GCWorkerCopyContext::post_copy`.
916+
// Note, `block.sweep()` overwrites `DEFRAG_STATE_TABLE` with the number of holes,
917+
// but we need it to know if a block is a defrag source.
918+
// We clear forwarding bits before `block.sweep()`.
919+
if let MetadataSpec::OnSide(side) = *VM::VMObjectModel::LOCAL_FORWARDING_BITS_SPEC {
920+
if is_moving_gc {
921+
let objects_may_move = if is_defrag_gc {
922+
// If it is a defrag GC, we only clear forwarding bits for defrag sources.
923+
block.is_defrag_source()
924+
} else {
925+
// Otherwise, it must be a nursery GC of StickyImmix with copying nursery.
926+
// We don't have information about which block contains moved objects,
927+
// so we have to clear forwarding bits for all blocks.
928+
true
929+
};
930+
if objects_may_move {
931+
side.bzero_metadata(block.start(), Block::BYTES);
932932
}
933933
}
934-
935-
if !block.sweep(self.space, &mut histogram, line_mark_state) {
936-
// Block is live. Increment the allocated block count.
937-
allocated_blocks += 1;
938-
}
939934
}
940-
// Set this chunk as free if there is not live blocks.
941-
if allocated_blocks == 0 {
942-
self.space.chunk_map.set(self.chunk, ChunkState::Free)
935+
936+
if !block.sweep(self.space, &mut histogram, line_mark_state) {
937+
// Block is live. Increment the allocated block count.
938+
allocated_blocks += 1;
943939
}
944940
}
941+
probe!(mmtk, sweep_chunk, allocated_blocks);
942+
// Set this chunk as free if there is not live blocks.
943+
if allocated_blocks == 0 {
944+
self.space.chunk_map.set(self.chunk, ChunkState::Free)
945+
}
945946
self.space.defrag.add_completed_mark_histogram(histogram);
946947
self.epilogue.finish_one_work_packet();
947948
}

src/policy/marksweepspace/native_ms/global.rs

Lines changed: 157 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1-
use std::sync::Arc;
2-
3-
use atomic::Ordering;
1+
use std::sync::{
2+
atomic::{AtomicUsize, Ordering},
3+
Arc,
4+
};
45

56
use crate::{
67
policy::{marksweepspace::native_ms::*, sft::GCWorkerMutRef},
7-
scheduler::{GCWorkScheduler, GCWorker},
8+
scheduler::{GCWorkScheduler, GCWorker, WorkBucketStage},
89
util::{
910
copy::CopySemantics,
1011
heap::{BlockPageResource, PageResource},
1112
metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec},
1213
ObjectReference,
1314
},
14-
vm::VMBinding,
15+
vm::{ActivePlan, VMBinding},
1516
};
1617

1718
#[cfg(feature = "is_mmtk_object")]
@@ -78,6 +79,9 @@ pub struct MarkSweepSpace<VM: VMBinding> {
7879
/// these block lists in the space. These lists are only filled in the release phase,
7980
/// and will be moved to the abandoned lists above at the end of a GC.
8081
abandoned_in_gc: Mutex<AbandonedBlockLists>,
82+
/// Count the number of pending `ReleaseMarkSweepSpace` and `ReleaseMutator` work packets during
83+
/// the `Release` stage.
84+
pending_release_packets: AtomicUsize,
8185
}
8286

8387
unsafe impl<VM: VMBinding> Sync for MarkSweepSpace<VM> {}
@@ -102,26 +106,31 @@ impl AbandonedBlockLists {
102106
// Release free blocks
103107
self.available[i].release_blocks(space);
104108
self.consumed[i].release_blocks(space);
105-
self.unswept[i].release_blocks(space);
109+
if cfg!(not(feature = "eager_sweeping")) {
110+
self.unswept[i].release_blocks(space);
111+
} else {
112+
// If we do eager sweeping, we should have no unswept blocks.
113+
debug_assert!(self.unswept[i].is_empty());
114+
}
115+
116+
// For eager sweeping, that's it. We just release unmarked blocks, and leave marked
117+
// blocks to be swept later in the `SweepChunk` work packet.
106118

107-
// Move remaining blocks to unswept
108-
self.unswept[i].append(&mut self.available[i]);
109-
self.unswept[i].append(&mut self.consumed[i]);
119+
// For lazy sweeping, we move blocks from available and consumed to unswept. When an
120+
// allocator tries to use them, they will sweep the block.
121+
if cfg!(not(feature = "eager_sweeping")) {
122+
self.unswept[i].append(&mut self.available[i]);
123+
self.unswept[i].append(&mut self.consumed[i]);
124+
}
110125
}
111126
}
112127

113-
fn sweep<VM: VMBinding>(&mut self, space: &MarkSweepSpace<VM>) {
128+
fn recycle_blocks(&mut self) {
114129
for i in 0..MI_BIN_FULL {
115-
self.available[i].release_and_sweep_blocks(space);
116-
self.consumed[i].release_and_sweep_blocks(space);
117-
self.unswept[i].release_and_sweep_blocks(space);
118-
119-
// As we have swept blocks, move blocks in the unswept list to available or consumed list.
120-
while let Some(block) = self.unswept[i].pop() {
130+
for block in self.consumed[i].iter() {
121131
if block.has_free_cells() {
132+
self.consumed[i].remove(block);
122133
self.available[i].push(block);
123-
} else {
124-
self.consumed[i].push(block);
125134
}
126135
}
127136
}
@@ -299,6 +308,7 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
299308
scheduler,
300309
abandoned: Mutex::new(AbandonedBlockLists::new()),
301310
abandoned_in_gc: Mutex::new(AbandonedBlockLists::new()),
311+
pending_release_packets: AtomicUsize::new(0),
302312
}
303313
}
304314

@@ -340,30 +350,16 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
340350
}
341351

342352
pub fn release(&mut self) {
343-
if cfg!(feature = "eager_sweeping") {
344-
// For eager sweeping, we have to sweep the lists that are abandoned to these global lists.
345-
let mut abandoned = self.abandoned.lock().unwrap();
346-
abandoned.sweep(self);
347-
} else {
348-
// For lazy sweeping, we just move blocks from consumed to unswept. When an allocator tries
349-
// to use them, they will sweep the block.
350-
let mut abandoned = self.abandoned.lock().unwrap();
351-
abandoned.sweep_later(self);
352-
}
353-
}
353+
let num_mutators = VM::VMActivePlan::number_of_mutators();
354+
// all ReleaseMutator work packets plus the ReleaseMarkSweepSpace packet
355+
self.pending_release_packets
356+
.store(num_mutators + 1, Ordering::SeqCst);
354357

355-
pub fn end_of_gc(&mut self) {
356-
let from = self.abandoned_in_gc.get_mut().unwrap();
357-
let to = self.abandoned.get_mut().unwrap();
358-
to.merge(from);
359-
360-
#[cfg(debug_assertions)]
361-
from.assert_empty();
362-
363-
// BlockPageResource uses worker-local block queues to eliminate contention when releasing
364-
// blocks, similar to how the MarkSweepSpace caches blocks in `abandoned_in_gc` before
365-
// returning to the global pool. We flush the BlockPageResource, too.
366-
self.pr.flush_all();
358+
// Do work in separate work packet in order not to slow down the `Release` work packet which
359+
// blocks all `ReleaseMutator` packets.
360+
let space = unsafe { &*(self as *const Self) };
361+
let work_packet = ReleaseMarkSweepSpace { space };
362+
self.scheduler.work_buckets[crate::scheduler::WorkBucketStage::Release].add(work_packet);
367363
}
368364

369365
/// Release a block.
@@ -419,6 +415,62 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
419415
pub fn get_abandoned_block_lists_in_gc(&self) -> &Mutex<AbandonedBlockLists> {
420416
&self.abandoned_in_gc
421417
}
418+
419+
pub fn release_packet_done(&self) {
420+
let old = self.pending_release_packets.fetch_sub(1, Ordering::SeqCst);
421+
if old == 1 {
422+
if cfg!(feature = "eager_sweeping") {
423+
// When doing eager sweeping, we start sweeing now.
424+
// After sweeping, we will recycle blocks.
425+
let work_packets = self.generate_sweep_tasks();
426+
self.scheduler.work_buckets[WorkBucketStage::Release].bulk_add(work_packets);
427+
} else {
428+
// When doing lazy sweeping, we recycle blocks now.
429+
self.recycle_blocks();
430+
}
431+
}
432+
}
433+
434+
fn generate_sweep_tasks(&self) -> Vec<Box<dyn GCWork<VM>>> {
435+
let space = unsafe { &*(self as *const Self) };
436+
let epilogue = Arc::new(RecycleBlocks {
437+
space,
438+
counter: AtomicUsize::new(0),
439+
});
440+
let tasks = self.chunk_map.generate_tasks(|chunk| {
441+
Box::new(SweepChunk {
442+
space,
443+
chunk,
444+
epilogue: epilogue.clone(),
445+
})
446+
});
447+
epilogue.counter.store(tasks.len(), Ordering::SeqCst);
448+
tasks
449+
}
450+
451+
fn recycle_blocks(&self) {
452+
{
453+
let mut abandoned = self.abandoned.try_lock().unwrap();
454+
let mut abandoned_in_gc = self.abandoned_in_gc.try_lock().unwrap();
455+
456+
if cfg!(feature = "eager_sweeping") {
457+
// When doing eager sweeping, previously consumed blocks may become available after
458+
// sweeping. We recycle them.
459+
abandoned.recycle_blocks();
460+
abandoned_in_gc.recycle_blocks();
461+
}
462+
463+
abandoned.merge(&mut abandoned_in_gc);
464+
465+
#[cfg(debug_assertions)]
466+
abandoned_in_gc.assert_empty();
467+
}
468+
469+
// BlockPageResource uses worker-local block queues to eliminate contention when releasing
470+
// blocks, similar to how the MarkSweepSpace caches blocks in `abandoned_in_gc` before
471+
// returning to the global pool. We flush the BlockPageResource, too.
472+
self.pr.flush_all();
473+
}
422474
}
423475

424476
use crate::scheduler::GCWork;
@@ -454,3 +506,67 @@ impl<VM: VMBinding> GCWork<VM> for PrepareChunkMap<VM> {
454506
}
455507
}
456508
}
509+
510+
struct ReleaseMarkSweepSpace<VM: VMBinding> {
511+
space: &'static MarkSweepSpace<VM>,
512+
}
513+
514+
impl<VM: VMBinding> GCWork<VM> for ReleaseMarkSweepSpace<VM> {
515+
fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
516+
{
517+
let mut abandoned = self.space.abandoned.lock().unwrap();
518+
abandoned.sweep_later(self.space);
519+
}
520+
521+
self.space.release_packet_done();
522+
}
523+
}
524+
525+
/// Chunk sweeping work packet. Only used by eager sweeping to sweep marked blocks after unmarked
526+
/// blocks have been released.
527+
struct SweepChunk<VM: VMBinding> {
528+
space: &'static MarkSweepSpace<VM>,
529+
chunk: Chunk,
530+
/// A destructor invoked when all `SweepChunk` packets are finished.
531+
epilogue: Arc<RecycleBlocks<VM>>,
532+
}
533+
534+
impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
535+
fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
536+
assert_eq!(self.space.chunk_map.get(self.chunk), ChunkState::Allocated);
537+
538+
// number of allocated blocks.
539+
let mut allocated_blocks = 0;
540+
// Iterate over all allocated blocks in this chunk.
541+
for block in self
542+
.chunk
543+
.iter_region::<Block>()
544+
.filter(|block| block.get_state() != BlockState::Unallocated)
545+
{
546+
// We have released unmarked blocks in `ReleaseMarkSweepSpace` and `ReleaseMutator`.
547+
// We shouldn't see any unmarked blocks now.
548+
debug_assert_eq!(block.get_state(), BlockState::Marked);
549+
block.sweep::<VM>();
550+
allocated_blocks += 1;
551+
}
552+
probe!(mmtk, sweep_chunk, allocated_blocks);
553+
// Set this chunk as free if there is not live blocks.
554+
if allocated_blocks == 0 {
555+
self.space.chunk_map.set(self.chunk, ChunkState::Free)
556+
}
557+
self.epilogue.finish_one_work_packet();
558+
}
559+
}
560+
561+
struct RecycleBlocks<VM: VMBinding> {
562+
space: &'static MarkSweepSpace<VM>,
563+
counter: AtomicUsize,
564+
}
565+
566+
impl<VM: VMBinding> RecycleBlocks<VM> {
567+
fn finish_one_work_packet(&self) {
568+
if 1 == self.counter.fetch_sub(1, Ordering::SeqCst) {
569+
self.space.recycle_blocks()
570+
}
571+
}
572+
}

0 commit comments

Comments
 (0)