Skip to content

Commit f0f547c

Browse files
committed
Make VM lock per fiber instead of per ractor.
Having it per-ractor is weird because that means two threads in the same ractor can both acquire the lock (if it's through the re-entering version). I don't think that's how it was envisioned to work. Also, it's currently broken if you raise an exception when the lock is held because `rb_ec_vm_lock_rec(ec)` gives you the state of the ractor's lock_rec, and that is saved on the `tag.lock_rec`. This value doesn't necessarily reflect the state of that thread or fiber's lock_rec (or even if that thread or fiber locked the VM lock), so we can't know if we should release it or how many levels to release. I changed it to work per-fiber like ruby mutexes. Now we can trust that the value saved is correct per fiber, and other threads and fibers trying to acquire the VM lock will be blocked by the fiber that acquired. Also, there was a "bug" (I'm not sure it can happen) where if you acquire the VM lock then call rb_fork, you can deadlock. After a fork we should reset ractor.sync.lock_rec to 0, lock_owner to NULL in case the VM lock was held above the `rb_fork` call site.
1 parent 73854a4 commit f0f547c

File tree

3 files changed

+30
-13
lines changed

3 files changed

+30
-13
lines changed

thread_pthread.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,8 +1419,10 @@ rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr)
14191419

14201420
// release VM lock
14211421
lock_rec = vm->ractor.sync.lock_rec;
1422+
rb_fiber_t *fiber = vm->ractor.sync.lock_owner_fiber;
14221423
vm->ractor.sync.lock_rec = 0;
14231424
vm->ractor.sync.lock_owner = NULL;
1425+
vm->ractor.sync.lock_owner_fiber = NULL;
14241426
rb_native_mutex_unlock(&vm->ractor.sync.lock);
14251427

14261428
// interrupts all running threads
@@ -1450,6 +1452,7 @@ rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr)
14501452
rb_native_mutex_lock(&vm->ractor.sync.lock);
14511453
vm->ractor.sync.lock_rec = lock_rec;
14521454
vm->ractor.sync.lock_owner = cr;
1455+
vm->ractor.sync.lock_owner_fiber = fiber;
14531456
}
14541457

14551458
// do not release ractor_sched_lock and threre is no newly added (resumed) thread
@@ -1583,6 +1586,12 @@ thread_sched_atfork(struct rb_thread_sched *sched)
15831586
}
15841587
vm->ractor.sched.running_cnt = 0;
15851588

1589+
vm->ractor.sync.lock_rec = 0;
1590+
th->ec->tag->lock_rec = 0;
1591+
vm->ractor.sync.lock_owner = NULL;
1592+
vm->ractor.sync.lock_owner_fiber = NULL;
1593+
rb_native_mutex_initialize(&vm->ractor.sync.lock);
1594+
15861595
rb_native_mutex_initialize(&vm->ractor.sched.lock);
15871596
#if VM_CHECK_MODE > 0
15881597
vm->ractor.sched.lock_owner = NULL;

vm_core.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ typedef struct rb_vm_struct {
676676
// monitor
677677
rb_nativethread_lock_t lock;
678678
struct rb_ractor_struct *lock_owner;
679+
struct rb_fiber_struct *lock_owner_fiber;
679680
unsigned int lock_rec;
680681

681682
// join at exit
@@ -2073,18 +2074,18 @@ void rb_ec_vm_lock_rec_release(const rb_execution_context_t *ec,
20732074
/* This technically is a data race, as it's checked without the lock, however we
20742075
* check against a value only our own thread will write. */
20752076
NO_SANITIZE("thread", static inline bool
2076-
vm_locked_by_ractor_p(rb_vm_t *vm, rb_ractor_t *cr))
2077+
vm_locked_by_fiber_p(rb_vm_t *vm, rb_fiber_t *cur_f))
20772078
{
2078-
VM_ASSERT(cr == GET_RACTOR());
2079-
return vm->ractor.sync.lock_owner == cr;
2079+
VM_ASSERT(cur_f == GET_THREAD()->ec->fiber_ptr);
2080+
return vm->ractor.sync.lock_owner_fiber == cur_f;
20802081
}
20812082

20822083
static inline unsigned int
20832084
rb_ec_vm_lock_rec(const rb_execution_context_t *ec)
20842085
{
20852086
rb_vm_t *vm = rb_ec_vm_ptr(ec);
20862087

2087-
if (!vm_locked_by_ractor_p(vm, rb_ec_ractor_ptr(ec))) {
2088+
if (!vm_locked_by_fiber_p(vm, ec->fiber_ptr)) {
20882089
return 0;
20892090
}
20902091
else {

vm_sync.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ void rb_ractor_sched_barrier_end(rb_vm_t *vm, rb_ractor_t *cr);
1212
static bool
1313
vm_locked(rb_vm_t *vm)
1414
{
15-
return vm_locked_by_ractor_p(vm, GET_RACTOR());
15+
return vm_locked_by_fiber_p(vm, GET_THREAD()->ec->fiber_ptr);
1616
}
1717

1818
#if RUBY_DEBUG > 0
@@ -62,7 +62,7 @@ vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm)
6262
}
6363

6464
static void
65-
vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS)
65+
vm_lock_enter(rb_ractor_t *cr, rb_fiber_t *fiber, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS)
6666
{
6767
RUBY_DEBUG_LOG2(file, line, "start locked:%d", locked);
6868

@@ -77,6 +77,7 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign
7777
// lock
7878
rb_native_mutex_lock(&vm->ractor.sync.lock);
7979
VM_ASSERT(vm->ractor.sync.lock_owner == NULL);
80+
VM_ASSERT(vm->ractor.sync.lock_owner_fiber == NULL);
8081
VM_ASSERT(vm->ractor.sync.lock_rec == 0);
8182

8283
// barrier
@@ -93,7 +94,9 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign
9394

9495
VM_ASSERT(vm->ractor.sync.lock_rec == 0);
9596
VM_ASSERT(vm->ractor.sync.lock_owner == NULL);
97+
VM_ASSERT(vm->ractor.sync.lock_owner_fiber == NULL);
9698
vm->ractor.sync.lock_owner = cr;
99+
vm->ractor.sync.lock_owner_fiber = fiber;
97100
}
98101

99102
vm->ractor.sync.lock_rec++;
@@ -130,6 +133,7 @@ vm_lock_leave(rb_vm_t *vm, bool no_barrier, unsigned int *lev APPEND_LOCATION_AR
130133

131134
if (vm->ractor.sync.lock_rec == 0) {
132135
vm->ractor.sync.lock_owner = NULL;
136+
vm->ractor.sync.lock_owner_fiber = NULL;
133137
rb_native_mutex_unlock(&vm->ractor.sync.lock);
134138
}
135139
}
@@ -139,10 +143,10 @@ rb_vm_lock_enter_body(unsigned int *lev APPEND_LOCATION_ARGS)
139143
{
140144
rb_vm_t *vm = GET_VM();
141145
if (vm_locked(vm)) {
142-
vm_lock_enter(NULL, vm, true, false, lev APPEND_LOCATION_PARAMS);
146+
vm_lock_enter(NULL, NULL, vm, true, false, lev APPEND_LOCATION_PARAMS);
143147
}
144148
else {
145-
vm_lock_enter(GET_RACTOR(), vm, false, false, lev APPEND_LOCATION_PARAMS);
149+
vm_lock_enter(GET_RACTOR(), GET_THREAD()->ec->fiber_ptr, vm, false, false, lev APPEND_LOCATION_PARAMS);
146150
}
147151
}
148152

@@ -151,18 +155,18 @@ rb_vm_lock_enter_body_nb(unsigned int *lev APPEND_LOCATION_ARGS)
151155
{
152156
rb_vm_t *vm = GET_VM();
153157
if (vm_locked(vm)) {
154-
vm_lock_enter(NULL, vm, true, true, lev APPEND_LOCATION_PARAMS);
158+
vm_lock_enter(NULL, NULL, vm, true, true, lev APPEND_LOCATION_PARAMS);
155159
}
156160
else {
157-
vm_lock_enter(GET_RACTOR(), vm, false, true, lev APPEND_LOCATION_PARAMS);
161+
vm_lock_enter(GET_RACTOR(), GET_THREAD()->ec->fiber_ptr, vm, false, true, lev APPEND_LOCATION_PARAMS);
158162
}
159163
}
160164

161165
void
162166
rb_vm_lock_enter_body_cr(rb_ractor_t *cr, unsigned int *lev APPEND_LOCATION_ARGS)
163167
{
164168
rb_vm_t *vm = GET_VM();
165-
vm_lock_enter(cr, vm, vm_locked(vm), false, lev APPEND_LOCATION_PARAMS);
169+
vm_lock_enter(cr, GET_THREAD()->ec->fiber_ptr, vm, vm_locked(vm), false, lev APPEND_LOCATION_PARAMS);
166170
}
167171

168172
void
@@ -174,7 +178,7 @@ rb_vm_lock_leave_body_nb(unsigned int *lev APPEND_LOCATION_ARGS)
174178
void
175179
rb_vm_lock_leave_body(unsigned int *lev APPEND_LOCATION_ARGS)
176180
{
177-
vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS);
181+
vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS);
178182
}
179183

180184
void
@@ -183,7 +187,7 @@ rb_vm_lock_body(LOCATION_ARGS)
183187
rb_vm_t *vm = GET_VM();
184188
ASSERT_vm_unlocking();
185189

186-
vm_lock_enter(GET_RACTOR(), vm, false, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS);
190+
vm_lock_enter(GET_RACTOR(), GET_THREAD()->ec->fiber_ptr, vm, false, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS);
187191
}
188192

189193
void
@@ -201,9 +205,11 @@ vm_cond_wait(rb_vm_t *vm, rb_nativethread_cond_t *cond, unsigned long msec)
201205
ASSERT_vm_locking();
202206
unsigned int lock_rec = vm->ractor.sync.lock_rec;
203207
rb_ractor_t *cr = vm->ractor.sync.lock_owner;
208+
rb_fiber_t *fiber = vm->ractor.sync.lock_owner_fiber;
204209

205210
vm->ractor.sync.lock_rec = 0;
206211
vm->ractor.sync.lock_owner = NULL;
212+
vm->ractor.sync.lock_owner_fiber = NULL;
207213
if (msec > 0) {
208214
rb_native_cond_timedwait(cond, &vm->ractor.sync.lock, msec);
209215
}
@@ -212,6 +218,7 @@ vm_cond_wait(rb_vm_t *vm, rb_nativethread_cond_t *cond, unsigned long msec)
212218
}
213219
vm->ractor.sync.lock_rec = lock_rec;
214220
vm->ractor.sync.lock_owner = cr;
221+
vm->ractor.sync.lock_owner_fiber = fiber;
215222
}
216223

217224
void

0 commit comments

Comments
 (0)