Skip to content

Commit a229ffd

Browse files
Make shared control block agnostic to 32-bit/64-bit.
We need to support cross-architecture host/child process. Use Futex on Linux which is a fixed 32-bit lock mechanism, unlike pthreads.
1 parent 3b058d5 commit a229ffd

10 files changed

+240
-59
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,18 @@ if (WIN32)
5858
target_sources(fossilize PRIVATE fossilize_external_replayer_windows.hpp)
5959
else()
6060
target_sources(fossilize PRIVATE fossilize_external_replayer_linux.hpp)
61+
if (APPLE)
62+
target_sources(fossilize PRIVATE platform/gcc_clang_spinlock.hpp)
63+
else()
64+
target_sources(fossilize PRIVATE platform/futex_wrapper_linux.hpp)
65+
endif()
6166
endif()
6267

6368
target_include_directories(fossilize PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
6469
target_compile_options(fossilize PRIVATE ${FOSSILIZE_CXX_FLAGS})
6570
if (NOT WIN32)
6671
target_link_libraries(fossilize -pthread)
72+
target_compile_options(fossilize PUBLIC -pthread)
6773
if (NOT APPLE)
6874
if (ANDROID)
6975
target_link_libraries(fossilize android log)

cli/fossilize_replay_linux.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <limits.h>
3636
#include <errno.h>
3737
#include "fossilize_external_replayer.hpp"
38+
#include "platform/futex_wrapper_linux.hpp"
3839

3940
static bool write_all(int fd, const char *str)
4041
{
@@ -133,9 +134,9 @@ void ProcessProgress::parse(const char *cmd)
133134
char buffer[ControlBlockMessageSize] = {};
134135
strcpy(buffer, cmd);
135136

136-
pthread_mutex_lock(&Global::control_block->lock);
137+
futex_wrapper_lock(&Global::control_block->futex_lock);
137138
shared_control_block_write(Global::control_block, buffer, sizeof(buffer));
138-
pthread_mutex_unlock(&Global::control_block->lock);
139+
futex_wrapper_unlock(&Global::control_block->futex_lock);
139140
}
140141
}
141142
else
@@ -405,7 +406,7 @@ static int run_master_process(const VulkanDevice::Options &opts,
405406
}
406407

407408
if (Global::control_block)
408-
Global::control_block->progress_started.store(true, std::memory_order_release);
409+
Global::control_block->progress_started.store(1, std::memory_order_release);
409410

410411
Global::active_processes = 0;
411412

@@ -570,7 +571,7 @@ static int run_master_process(const VulkanDevice::Options &opts,
570571
}
571572

572573
if (Global::control_block)
573-
Global::control_block->progress_complete.store(true, std::memory_order_release);
574+
Global::control_block->progress_complete.store(1, std::memory_order_release);
574575

575576
return EXIT_SUCCESS;
576577
}
@@ -723,11 +724,11 @@ static int run_slave_process(const VulkanDevice::Options &opts,
723724
#if 0
724725
if (Global::control_block)
725726
{
726-
pthread_mutex_lock(&Global::control_block->lock);
727+
futex_wrapper_lock(&Global::control_block->futex_lock);
727728
char msg[ControlBlockMessageSize] = {};
728729
sprintf(msg, "SLAVE_FINISHED\n");
729730
shared_control_block_write(Global::control_block, msg, sizeof(msg));
730-
pthread_mutex_unlock(&Global::control_block->lock);
731+
futex_wrapper_unlock(&Global::control_block->futex_lock);
731732
}
732733
#endif
733734

cli/fossilize_replay_windows.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ static int run_master_process(const VulkanDevice::Options &opts,
593593
}
594594

595595
if (Global::control_block)
596-
Global::control_block->progress_started.store(true, std::memory_order_release);
596+
Global::control_block->progress_started.store(1, std::memory_order_release);
597597

598598
Global::active_processes = 0;
599599
vector<ProcessProgress> child_processes(processes);
@@ -700,6 +700,10 @@ static int run_master_process(const VulkanDevice::Options &opts,
700700

701701
if (Global::job_handle)
702702
CloseHandle(Global::job_handle);
703+
704+
if (Global::control_block)
705+
Global::control_block->progress_complete.store(1, std::memory_order_release);
706+
703707
return EXIT_SUCCESS;
704708
}
705709

fossilize_external_replayer_control_block.hpp

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,6 @@
2323
#pragma once
2424

2525
#include <string.h>
26-
#ifdef _WIN32
27-
#define WIN32_LEAN_AND_MEAN
28-
#include <windows.h>
29-
#else
30-
#include <pthread.h>
31-
#endif
32-
3326
#include <atomic>
3427
static_assert(sizeof(std::atomic<uint32_t>) == sizeof(uint32_t), "Atomic size mismatch. This type likely requires a lock to work.");
3528

@@ -39,17 +32,16 @@ static_assert(sizeof(std::atomic<uint32_t>) == sizeof(uint32_t), "Atomic size mi
3932
namespace Fossilize
4033
{
4134
enum { ControlBlockMessageSize = 32 };
42-
enum { ControlBlockMagic = 0x19bcde14 };
35+
enum { ControlBlockMagic = 0x19bcde15 };
4336

4437
struct SharedControlBlock
4538
{
4639
uint32_t version_cookie;
4740

48-
#ifndef _WIN32
49-
pthread_mutex_t lock;
50-
#endif
41+
// Used to implement a lock (or spinlock).
42+
int futex_lock;
5143

52-
// Progress. Just need atomics to implements this.
44+
// Progress. Just need atomics to implement this.
5345
std::atomic<uint32_t> successful_modules;
5446
std::atomic<uint32_t> successful_graphics;
5547
std::atomic<uint32_t> successful_compute;
@@ -64,30 +56,29 @@ struct SharedControlBlock
6456
std::atomic<uint32_t> total_modules;
6557
std::atomic<uint32_t> banned_modules;
6658
std::atomic<uint32_t> module_validation_failures;
67-
std::atomic<bool> progress_started;
68-
std::atomic<bool> progress_complete;
59+
std::atomic<uint32_t> progress_started;
60+
std::atomic<uint32_t> progress_complete;
6961

7062
// Ring buffer. Needs lock.
71-
uint64_t write_count;
72-
uint64_t read_count;
73-
74-
size_t read_offset;
75-
size_t write_offset;
76-
size_t ring_buffer_offset;
77-
size_t ring_buffer_size;
63+
uint32_t write_count;
64+
uint32_t read_count;
65+
uint32_t read_offset;
66+
uint32_t write_offset;
67+
uint32_t ring_buffer_offset;
68+
uint32_t ring_buffer_size;
7869
};
7970

8071
// These are not thread-safe. Need to lock them by external means.
81-
static inline size_t shared_control_block_read_avail(SharedControlBlock *control_block)
72+
static inline uint32_t shared_control_block_read_avail(SharedControlBlock *control_block)
8273
{
83-
size_t ret = control_block->write_count - control_block->read_count;
74+
uint32_t ret = control_block->write_count - control_block->read_count;
8475
return ret;
8576
}
8677

87-
static inline size_t shared_control_block_write_avail(SharedControlBlock *control_block)
78+
static inline uint32_t shared_control_block_write_avail(SharedControlBlock *control_block)
8879
{
89-
size_t ret = 0;
90-
size_t max_capacity_write_count = control_block->read_count + control_block->ring_buffer_size;
80+
uint32_t ret = 0;
81+
uint32_t max_capacity_write_count = control_block->read_count + control_block->ring_buffer_size;
9182
if (control_block->write_count >= max_capacity_write_count)
9283
ret = 0;
9384
else
@@ -96,7 +87,7 @@ static inline size_t shared_control_block_write_avail(SharedControlBlock *contro
9687
}
9788

9889
static inline bool shared_control_block_read(SharedControlBlock *control_block,
99-
void *data_, size_t size)
90+
void *data_, uint32_t size)
10091
{
10192
auto *data = static_cast<uint8_t *>(data_);
10293
const uint8_t *ring = reinterpret_cast<const uint8_t *>(control_block) + control_block->ring_buffer_offset;
@@ -107,8 +98,8 @@ static inline bool shared_control_block_read(SharedControlBlock *control_block,
10798
if (size > (control_block->write_count - control_block->read_count))
10899
return false;
109100

110-
size_t read_first = control_block->ring_buffer_size - control_block->read_offset;
111-
size_t read_second = 0;
101+
uint32_t read_first = control_block->ring_buffer_size - control_block->read_offset;
102+
uint32_t read_second = 0;
112103
if (read_first > size)
113104
read_first = size;
114105
read_second = size - read_first;
@@ -123,20 +114,20 @@ static inline bool shared_control_block_read(SharedControlBlock *control_block,
123114
}
124115

125116
static inline bool shared_control_block_write(SharedControlBlock *control_block,
126-
const void *data_, size_t size)
117+
const void *data_, uint32_t size)
127118
{
128119
auto *data = static_cast<const uint8_t *>(data_);
129120
uint8_t *ring = reinterpret_cast<uint8_t *>(control_block) + control_block->ring_buffer_offset;
130121

131122
if (size > control_block->ring_buffer_size)
132123
return false;
133124

134-
size_t max_capacity_write_count = control_block->read_count + control_block->ring_buffer_size;
125+
uint32_t max_capacity_write_count = control_block->read_count + control_block->ring_buffer_size;
135126
if (control_block->write_count + size > max_capacity_write_count)
136127
return false;
137128

138-
size_t write_first = control_block->ring_buffer_size - control_block->write_offset;
139-
size_t write_second = 0;
129+
uint32_t write_first = control_block->ring_buffer_size - control_block->write_offset;
130+
uint32_t write_second = 0;
140131
if (write_first > size)
141132
write_first = size;
142133
write_second = size - write_first;

fossilize_external_replayer_linux.hpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,17 @@
3636
#include <vector>
3737
#include <unordered_set>
3838
#include <string>
39-
#include <pthread.h>
4039
#include <signal.h>
4140
#include <limits.h>
4241
#include "path.hpp"
4342
#include "fossilize_external_replayer_control_block.hpp"
4443

44+
#ifdef __linux__
45+
#include "platform/futex_wrapper_linux.hpp"
46+
#else
47+
#include "platform/gcc_clang_spinlock.hpp"
48+
#endif
49+
4550
namespace Fossilize
4651
{
4752
static std::atomic<int32_t> shm_index;
@@ -74,10 +79,7 @@ ExternalReplayer::Impl::~Impl()
7479
close(fd);
7580

7681
if (shm_block)
77-
{
78-
pthread_mutex_destroy(&shm_block->lock);
7982
munmap(shm_block, shm_block_size);
80-
}
8183
}
8284

8385
uintptr_t ExternalReplayer::Impl::get_process_handle() const
@@ -90,9 +92,9 @@ ExternalReplayer::PollResult ExternalReplayer::Impl::poll_progress(ExternalRepla
9092
if (pid < 0)
9193
return ExternalReplayer::PollResult::Error;
9294

93-
bool complete = shm_block->progress_complete.load(std::memory_order_acquire);
95+
bool complete = shm_block->progress_complete.load(std::memory_order_acquire) != 0;
9496

95-
if (!shm_block->progress_started.load(std::memory_order_acquire))
97+
if (shm_block->progress_started.load(std::memory_order_acquire) == 0)
9698
return ExternalReplayer::PollResult::ResultNotReady;
9799

98100
progress.compute.total = shm_block->total_compute.load(std::memory_order_relaxed);
@@ -110,15 +112,15 @@ ExternalReplayer::PollResult ExternalReplayer::Impl::poll_progress(ExternalRepla
110112
progress.clean_crashes = shm_block->clean_process_deaths.load(std::memory_order_relaxed);
111113
progress.dirty_crashes = shm_block->dirty_process_deaths.load(std::memory_order_relaxed);
112114

113-
pthread_mutex_lock(&shm_block->lock);
115+
futex_wrapper_lock(&shm_block->futex_lock);
114116
size_t read_avail = shared_control_block_read_avail(shm_block);
115117
for (size_t i = ControlBlockMessageSize; i <= read_avail; i += ControlBlockMessageSize)
116118
{
117119
char buf[ControlBlockMessageSize] = {};
118120
shared_control_block_read(shm_block, buf, sizeof(buf));
119121
parse_message(buf);
120122
}
121-
pthread_mutex_unlock(&shm_block->lock);
123+
futex_wrapper_unlock(&shm_block->futex_lock);
122124
return complete ? ExternalReplayer::PollResult::Complete : ExternalReplayer::PollResult::Running;
123125
}
124126

@@ -241,15 +243,6 @@ bool ExternalReplayer::Impl::start(const ExternalReplayer::Options &options)
241243
shm_block->ring_buffer_size = 64 * 1024;
242244
shm_block->ring_buffer_offset = 4 * 1024;
243245

244-
pthread_mutexattr_t attr;
245-
if (pthread_mutexattr_init(&attr) < 0)
246-
return false;
247-
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) < 0)
248-
return false;
249-
if (pthread_mutex_init(&shm_block->lock, &attr) < 0)
250-
return false;
251-
pthread_mutexattr_destroy(&attr);
252-
253246
// We need to let our child inherit the shared FD.
254247
int current_flags = fcntl(fd, F_GETFD);
255248
if (current_flags < 0)

fossilize_external_replayer_windows.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ ExternalReplayer::PollResult ExternalReplayer::Impl::poll_progress(ExternalRepla
8383
if (!process)
8484
return ExternalReplayer::PollResult::Error;
8585

86-
bool complete = shm_block->progress_complete.load(std::memory_order_acquire);
86+
bool complete = shm_block->progress_complete.load(std::memory_order_acquire) != 0;
8787

88-
if (!shm_block->progress_started.load(std::memory_order_acquire))
88+
if (shm_block->progress_started.load(std::memory_order_acquire) == 0)
8989
return ExternalReplayer::PollResult::ResultNotReady;
9090

9191
progress.compute.total = shm_block->total_compute.load(std::memory_order_relaxed);

platform/futex_wrapper_linux.hpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* Copyright (c) 2019 Hans-Kristian Arntzen
2+
*
3+
* Permission is hereby granted, free of charge, to any person obtaining
4+
* a copy of this software and associated documentation files (the
5+
* "Software"), to deal in the Software without restriction, including
6+
* without limitation the rights to use, copy, modify, merge, publish,
7+
* distribute, sublicense, and/or sell copies of the Software, and to
8+
* permit persons to whom the Software is furnished to do so, subject to
9+
* the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be
12+
* included in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21+
*/
22+
23+
#pragma once
24+
25+
#include <unistd.h>
26+
#include <sys/syscall.h>
27+
#include <linux/futex.h>
28+
29+
// Implementation based on https://eli.thegreenplace.net/2018/basics-of-futexes/ and
30+
// "Futexes are Tricky" by Ulrich Drepper.
31+
// Kind of overkill, but we need a mutex which can work cross-process and cross-architecture via shared memory (32-bit and 64-bit).
32+
// Alternative is full kernel semaphores or raw spinlocks.
33+
34+
namespace Fossilize
35+
{
36+
static inline int cmpxchg(int *value, int expected_value, int new_value)
37+
{
38+
int ret = __sync_val_compare_and_swap(value, expected_value, new_value);
39+
return ret;
40+
}
41+
42+
static inline void futex_wrapper_lock(int *lock)
43+
{
44+
int c = cmpxchg(lock, 0, 1);
45+
if (c != 0)
46+
{
47+
// Contention.
48+
do
49+
{
50+
// Need to lock. Force *lock to be 2.
51+
if (c == 2 || cmpxchg(lock, 1, 2) != 0)
52+
{
53+
// If *lock is 2 (was not unlocked somehow by other thread),
54+
// wait until it's woken up.
55+
syscall(SYS_futex, lock, FUTEX_WAIT, 2, 0, 0, 0);
56+
}
57+
} while ((c = cmpxchg(lock, 0, 2)) != 0);
58+
}
59+
}
60+
61+
static inline void futex_wrapper_unlock(int *lock)
62+
{
63+
int c = __sync_sub_and_fetch(lock, 1);
64+
if (c == 1)
65+
{
66+
// We have some waiters to wake up.
67+
68+
// Atomic store, really, but there's no __sync variant for that.
69+
__sync_fetch_and_and(lock, 0);
70+
71+
syscall(SYS_futex, lock, FUTEX_WAKE, 1, 0, 0, 0);
72+
}
73+
}
74+
}

0 commit comments

Comments
 (0)