Skip to content

Commit 0d5c7a4

Browse files
committedMay 7, 2024
Add low-memory-footprint mutex
The pthread_mutex_t provided by the POSIX Threads API offers robust features, such as integration with condition variables and graceful handling of contention. However, on 64-bit Linux systems, each pthread_mutex_t instance occupies 40 bytes of memory. When safeguarding small data structures, this substantial memory footprint can lead to inefficient memory usage and reduced cache effectiveness, potentially impacting performance.
1 parent a311be8 commit 0d5c7a4

File tree

14 files changed

+1012
-0
lines changed

14 files changed

+1012
-0
lines changed
 

‎README.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ purpose of these programs is to be illustrative and educational.
1414
- [tpool](tpool/): A lightweight thread pool.
1515
- [refcnt](refcnt/): A generic reference counting.
1616
- [work-steal](work-steal/): A work-stealing scheduler.
17+
- [mutex](mutex/): A low-memory-footprint mutex.
1718
* [Producer–consumer problem](https://en.wikipedia.org/wiki/Producer%E2%80%93consumer_problem)
1819
- [spmc](spmc/): A concurrent single-producer/multiple-consumer queue.
1920
- [mpsc](mpsc/): An unbounded lockless single-consumer/multiple-producer FIFO queue.

‎mutex/atomic.h

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#pragma once
2+
3+
#include <stdatomic.h>
4+
5+
#define atomic _Atomic
6+
7+
#define load(obj, order) atomic_load_explicit(obj, memory_order_##order)
8+
9+
#define store(obj, value, order) \
10+
atomic_store_explicit(obj, value, memory_order_##order)
11+
12+
#define exchange(obj, value, order) \
13+
atomic_exchange_explicit(obj, value, memory_order_##order)
14+
15+
#define compare_exchange_weak(obj, expected, desired, succ, fail) \
16+
atomic_compare_exchange_weak_explicit( \
17+
obj, expected, desired, memory_order_##succ, memory_order_##fail)
18+
19+
#define compare_exchange_strong(obj, expected, desired, succ, fail) \
20+
atomic_compare_exchange_strong_explicit( \
21+
obj, expected, desired, memory_order_##succ, memory_order_##fail)
22+
23+
#define fetch_add(obj, arg, order) \
24+
atomic_fetch_add_explicit(obj, arg, memory_order_##order)
25+
26+
#define fetch_sub(obj, arg, order) \
27+
atomic_fetch_sub_explicit(obj, arg, memory_order_##order)
28+
29+
#define fetch_or(obj, arg, order) \
30+
atomic_fetch_or_explicit(obj, arg, memory_order_##order)
31+
32+
#define fetch_xor(obj, arg, order) \
33+
atomic_fetch_xor_explicit(obj, arg, memory_order_##order)
34+
35+
#define fetch_and(obj, arg, order) \
36+
atomic_fetch_and_explicit(obj, arg, memory_order_##order)
37+
38+
/* ThreadSanitizer does not support atomic_thread_fence() */
39+
#ifdef __has_feature
40+
#define TSAN __has_feature(thread_sanitizer)
41+
#else
42+
#define TSAN __SANITIZE_THREAD__
43+
#endif
44+
45+
#if TSAN
46+
#define thread_fence(obj, order) fetch_add(obj, 0, order)
47+
#else
48+
#define thread_fence(obj, order) atomic_thread_fence(memory_order_##order)
49+
#endif

‎mutex/cond.h

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#pragma once
2+
3+
#if USE_PTHREADS
4+
5+
#include <pthread.h>
6+
7+
#define cond_t pthread_cond_t
8+
#define cond_init(c) pthread_cond_init(c, NULL)
9+
#define COND_INITIALIZER PTHREAD_COND_INITIALIZER
10+
#define cond_wait(c, m) pthread_cond_wait(c, m)
11+
#define cond_signal(c, m) pthread_cond_signal(c)
12+
#define cond_broadcast(c, m) pthread_cond_broadcast(c)
13+
14+
#else
15+
16+
#include <limits.h>
17+
#include <stddef.h>
18+
#include "atomic.h"
19+
#include "futex.h"
20+
#include "mutex.h"
21+
#include "spinlock.h"
22+
23+
typedef struct {
24+
atomic int seq;
25+
} cond_t;
26+
27+
static inline void cond_init(cond_t *cond)
28+
{
29+
atomic_init(&cond->seq, 0);
30+
}
31+
32+
static inline void cond_wait(cond_t *cond, mutex_t *mutex)
33+
{
34+
int seq = load(&cond->seq, relaxed);
35+
36+
mutex_unlock(mutex);
37+
38+
#define COND_SPINS 128
39+
for (int i = 0; i < COND_SPINS; ++i) {
40+
if (load(&cond->seq, relaxed) != seq) {
41+
mutex_lock(mutex);
42+
return;
43+
}
44+
spin_hint();
45+
}
46+
47+
futex_wait(&cond->seq, seq);
48+
49+
mutex_lock(mutex);
50+
51+
fetch_or(&mutex->state, MUTEX_SLEEPING, relaxed); // AAAA
52+
}
53+
54+
static inline void cond_signal(cond_t *cond, mutex_t *mutex)
55+
{
56+
fetch_add(&cond->seq, 1, relaxed); // BBBB
57+
futex_wake(&cond->seq, 1); // EEEE
58+
}
59+
60+
static inline void cond_broadcast(cond_t *cond, mutex_t *mutex)
61+
{
62+
fetch_add(&cond->seq, 1, relaxed); // CCCC
63+
futex_requeue(&cond->seq, 1, &mutex->state); // DDDD
64+
}
65+
66+
#endif

‎mutex/example/Makefile

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
CFLAGS := -I.. -std=c11 -Wall -g -O2 -D_GNU_SOURCE -fsanitize=thread
2+
LDFLAGS := -lpthread
3+
4+
ALL := test_pthread test_linux
5+
6+
all: $(ALL)
7+
.PHONY: all
8+
9+
test_%: main.c
10+
$(CC) $(CFLAGS) main.c -o $@ $(LDFLAGS)
11+
12+
test_pthread: CFLAGS += -DUSE_PTHREADS
13+
test_linux: CFLAGS += -DUSE_LINUX
14+
15+
# Test suite
16+
NAME_S := $(shell uname -s)
17+
ifeq ($(UNAME_S),Darwin)
18+
PRINTF = printf
19+
else
20+
PRINTF = env printf
21+
endif
22+
PASS_COLOR = \e[32;01m
23+
NO_COLOR = \e[0m
24+
notice = $(PRINTF) "$(PASS_COLOR)$(strip $1)$(NO_COLOR)\n"
25+
26+
check: $(ALL)
27+
@$(foreach t,$^,\
28+
$(PRINTF) "Running $(t) ... "; \
29+
./$(t) && $(call notice, [OK]); \
30+
)
31+
32+
clean:
33+
$(RM) $(ALL)
34+
.PHONY: clean
35+
36+
-include $(OUT)/*.d

‎mutex/example/main.c

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#include <pthread.h>
2+
#include <stdbool.h>
3+
#include <stdlib.h>
4+
5+
#include "cond.h"
6+
#include "futex.h"
7+
#include "mutex.h"
8+
9+
struct clock {
10+
mutex_t mutex;
11+
cond_t cond;
12+
int ticks;
13+
};
14+
15+
static void clock_init(struct clock *clock)
16+
{
17+
mutex_init(&clock->mutex, NULL);
18+
cond_init(&clock->cond);
19+
clock->ticks = 0;
20+
}
21+
22+
static bool clock_wait(struct clock *clock, int ticks)
23+
{
24+
mutex_lock(&clock->mutex);
25+
while (clock->ticks >= 0 && clock->ticks < ticks)
26+
cond_wait(&clock->cond, &clock->mutex);
27+
bool ret = clock->ticks >= ticks;
28+
mutex_unlock(&clock->mutex);
29+
return ret;
30+
}
31+
32+
static void clock_tick(struct clock *clock)
33+
{
34+
mutex_lock(&clock->mutex);
35+
if (clock->ticks >= 0)
36+
++clock->ticks;
37+
mutex_unlock(&clock->mutex);
38+
cond_broadcast(&clock->cond, &clock->mutex);
39+
}
40+
41+
static void clock_stop(struct clock *clock)
42+
{
43+
mutex_lock(&clock->mutex);
44+
clock->ticks = -1;
45+
mutex_unlock(&clock->mutex);
46+
cond_broadcast(&clock->cond, &clock->mutex);
47+
}
48+
49+
/* A node in a computation graph */
50+
struct node {
51+
struct clock *clock;
52+
struct node *parent;
53+
mutex_t mutex;
54+
cond_t cond;
55+
bool ready;
56+
};
57+
58+
static void node_init(struct clock *clock,
59+
struct node *parent,
60+
struct node *node)
61+
{
62+
node->clock = clock;
63+
node->parent = parent;
64+
mutex_init(&node->mutex, NULL);
65+
cond_init(&node->cond);
66+
node->ready = false;
67+
}
68+
69+
static void node_wait(struct node *node)
70+
{
71+
mutex_lock(&node->mutex);
72+
while (!node->ready)
73+
cond_wait(&node->cond, &node->mutex);
74+
node->ready = false;
75+
mutex_unlock(&node->mutex);
76+
}
77+
78+
static void node_signal(struct node *node)
79+
{
80+
mutex_lock(&node->mutex);
81+
node->ready = true;
82+
mutex_unlock(&node->mutex);
83+
cond_signal(&node->cond, &node->mutex);
84+
}
85+
86+
static void *thread_func(void *ptr)
87+
{
88+
struct node *self = ptr;
89+
bool bit = false;
90+
91+
for (int i = 1; clock_wait(self->clock, i); ++i) {
92+
if (self->parent)
93+
node_wait(self->parent);
94+
95+
if (bit) {
96+
node_signal(self);
97+
} else {
98+
clock_tick(self->clock);
99+
}
100+
bit = !bit;
101+
}
102+
103+
node_signal(self);
104+
return NULL;
105+
}
106+
107+
int main(void)
108+
{
109+
struct clock clock;
110+
clock_init(&clock);
111+
112+
#define N_NODES 16
113+
struct node nodes[N_NODES];
114+
node_init(&clock, NULL, &nodes[0]);
115+
for (int i = 1; i < N_NODES; ++i)
116+
node_init(&clock, &nodes[i - 1], &nodes[i]);
117+
118+
pthread_t threads[N_NODES];
119+
for (int i = 0; i < N_NODES; ++i) {
120+
if (pthread_create(&threads[i], NULL, thread_func, &nodes[i]) != 0)
121+
return EXIT_FAILURE;
122+
}
123+
124+
clock_tick(&clock);
125+
clock_wait(&clock, 1 << N_NODES);
126+
clock_stop(&clock);
127+
128+
for (int i = 0; i < N_NODES; ++i) {
129+
if (pthread_join(threads[i], NULL) != 0)
130+
return EXIT_FAILURE;
131+
}
132+
133+
return EXIT_SUCCESS;
134+
}

‎mutex/futex.h

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#pragma once
2+
3+
#if USE_LINUX
4+
5+
#include <limits.h>
6+
#include <linux/futex.h>
7+
#include <sys/syscall.h>
8+
#include <unistd.h>
9+
10+
/* Atomically check if '*futex == value', and if so, go to sleep */
11+
static inline void futex_wait(atomic int *futex, int value)
12+
{
13+
syscall(SYS_futex, futex, FUTEX_WAIT_PRIVATE, value, NULL);
14+
}
15+
16+
/* Wake up 'limit' threads currently waiting on 'futex' */
17+
static inline void futex_wake(atomic int *futex, int limit)
18+
{
19+
syscall(SYS_futex, futex, FUTEX_WAKE_PRIVATE, limit);
20+
}
21+
22+
/* Wake up 'limit' waiters, and re-queue the rest onto a different futex */
23+
static inline void futex_requeue(atomic int *futex,
24+
int limit,
25+
atomic int *other)
26+
{
27+
syscall(SYS_futex, futex, FUTEX_REQUEUE_PRIVATE, limit, INT_MAX, other);
28+
}
29+
30+
#ifndef FUTEX_LOCK_PI2_PRIVATE
31+
#define FUTEX_LOCK_PI2 13
32+
#define FUTEX_LOCK_PI2_PRIVATE (FUTEX_LOCK_PI2 | FUTEX_PRIVATE_FLAG)
33+
#endif
34+
35+
static inline void futex_lock_pi(atomic int *futex, struct timespec *timeout)
36+
{
37+
/* Note: val is ignored for FUTEX_LOCK_PI, just fill a dummy value. */
38+
int val = 0;
39+
syscall(SYS_futex, futex, FUTEX_LOCK_PI2_PRIVATE, val, timeout);
40+
}
41+
42+
static inline void futex_unlock_pi(atomic int *futex)
43+
{
44+
syscall(SYS_futex, futex, FUTEX_UNLOCK_PI_PRIVATE);
45+
}
46+
#endif

0 commit comments

Comments
 (0)
Please sign in to comment.