From 8b1fb38e5474433aad49126cedfe289063dcc17f Mon Sep 17 00:00:00 2001 From: Andrew Coffey Date: Sun, 23 Feb 2025 22:16:10 -0600 Subject: [PATCH 1/2] Freethreading surface early work --- examples/multithreaded_surface.py | 70 +++++++++++++++++++++++++++++++ pyproject.toml | 2 +- src_c/include/_pygame.h | 29 +++++++++++++ src_c/surface.c | 50 ++++++++++++++++++---- src_c/surflock.c | 10 +++++ 5 files changed, 153 insertions(+), 8 deletions(-) create mode 100644 examples/multithreaded_surface.py diff --git a/examples/multithreaded_surface.py b/examples/multithreaded_surface.py new file mode 100644 index 0000000000..995b7c51b2 --- /dev/null +++ b/examples/multithreaded_surface.py @@ -0,0 +1,70 @@ +import pygame +import threading +import random + +from time import perf_counter + +start = perf_counter() + +pygame.init() + +WIDTH, HEIGHT = 100, 100 +NUM_THREADS = 10 +surface = pygame.Surface((WIDTH, HEIGHT)) +surface.fill("black") + +LERP_OFFSET = 0.001 + + +def get_random_color() -> pygame.Color: + r = random.randint(0, 255) + g = random.randint(0, 255) + b = random.randint(0, 255) + a = 255 + return pygame.Color(r, g, b, a) + + +def multithreaded_func( + surf: pygame.Surface, target_pixel: tuple[int, int], target_color: pygame.Color +) -> None: + lerp_distance = 0 + + original_color = surf.get_at(target_pixel) + + while (surf.get_at(target_pixel) != target_color) and lerp_distance < 1: + lerp_distance += LERP_OFFSET + new_color = original_color.lerp(target_color, lerp_distance) + surf.set_at(target_pixel, new_color) + + +pixels = [(col, row) for col in range(WIDTH) for row in range(HEIGHT)] + +colors = [get_random_color() for _ in range(WIDTH * HEIGHT)] + +args = [(pixel, colors[i]) for i, pixel in enumerate(pixels)] +batches = { + i: args[i * NUM_THREADS : (i + 1) * NUM_THREADS] + for i in range(WIDTH * HEIGHT // NUM_THREADS) +} + +for batch in batches.values(): + threads: list[threading.Thread] = [] + for pixel, color in batch: + new_thread = threading.Thread( + target=multithreaded_func, args=(surface, pixel, color) + ) + new_thread.start() + threads.append(new_thread) + + while any([t.is_alive() for t in threads]): + continue + +pygame.image.save(pygame.transform.scale_by(surface, 10), "out.png") + +end = perf_counter() + +print(f"time taken: {end - start}") + +for pixel, color in zip(pixels, colors): + surface.set_at(pixel, color) +pygame.image.save(pygame.transform.scale_by(surface, 10), "comparison.png") diff --git a/pyproject.toml b/pyproject.toml index 2489fb9057..8982f59ede 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ requires = [ "meson-python<=0.17.1", "meson<=1.7.0", "ninja<=1.12.1", - "cython<=3.0.11", + "cython<=3.1.0a1", "sphinx<=8.1.3", "sphinx-autoapi<=3.3.2", ] diff --git a/src_c/include/_pygame.h b/src_c/include/_pygame.h index 5ae7f13ab2..ec333d12cc 100644 --- a/src_c/include/_pygame.h +++ b/src_c/include/_pygame.h @@ -320,9 +320,26 @@ typedef struct { PyObject *weakreflist; PyObject *locklist; PyObject *dependency; +#if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED + PyMutex mutex; +#endif } pgSurfaceObject; #define pgSurface_AsSurface(x) (((pgSurfaceObject *)x)->surf) +#if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED +#define LOCK_pgSurfaceObject(pgSurfacePtr) \ + PyMutex_Lock(&(((pgSurfaceObject *)pgSurfacePtr)->mutex)); +#else +#define LOCK_pgSurfaceObject(pgSurfacePtr) +#endif + +#if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED +#define UNLOCK_pgSurfaceObject(pgSurfacePtr) \ + PyMutex_Unlock(&(((pgSurfaceObject *)pgSurfacePtr)->mutex)); +#else +#define UNLOCK_pgSurfaceObject(pgSurfacePtr) +#endif + #ifndef PYGAMEAPI_SURFACE_INTERNAL #define pgSurface_Type (*(PyTypeObject *)PYGAMEAPI_GET_SLOT(surface, 0)) @@ -617,6 +634,18 @@ PYGAMEAPI_EXTERN_SLOTS(geometry); } \ } +#if Py_GIL_DISABLED +#define DISABLE_GIL_SINGLE_INITIALIZATION(module, name) \ + if (PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED) < 0) { \ + Py_DECREF(module); \ + return NULL; \ + } \ + printf("%s was compiled with GIL disabled\n", name); +#else +#define DISABLE_GIL_SINGLE_INITIALIZATION(module, name) \ + printf("%s was compiled with GIL enabled\n", name); +#endif + static PG_INLINE PyObject * pg_tuple_couple_from_values_int(int val1, int val2) { diff --git a/src_c/surface.c b/src_c/surface.c index 9424452359..cc45fbd1e9 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -392,6 +392,9 @@ surface_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->weakreflist = NULL; self->dependency = NULL; self->locklist = NULL; +#if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED + memset(&(self->mutex), 0, sizeof(PyMutex)); +#endif } return (PyObject *)self; } @@ -725,8 +728,9 @@ surf_get_at(PyObject *self, PyObject *position) "position must be a sequence of two numbers"); } - if (x < 0 || x >= surf->w || y < 0 || y >= surf->h) + if (x < 0 || x >= surf->w || y < 0 || y >= surf->h) { return RAISE(PyExc_IndexError, "pixel index out of range"); + } PG_PixelFormat *format; SDL_Palette *palette; @@ -735,11 +739,18 @@ surf_get_at(PyObject *self, PyObject *position) } int bpp = PG_FORMAT_BytesPerPixel(format); - if (bpp < 1 || bpp > 4) + if (bpp < 1 || bpp > 4) { return RAISE(PyExc_RuntimeError, "invalid color depth for surface"); + } - if (!pgSurface_Lock((pgSurfaceObject *)self)) + if (!pgSurface_Lock((pgSurfaceObject *)self)) { + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + } + PyErr_SetString(pgExc_SDLError, "Failed to lock surface"); return NULL; + } pixels = (Uint8 *)surf->pixels; @@ -769,8 +780,14 @@ surf_get_at(PyObject *self, PyObject *position) rgba + 3); break; } - if (!pgSurface_Unlock((pgSurfaceObject *)self)) + if (!pgSurface_Unlock((pgSurfaceObject *)self)) { + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + } + PyErr_SetString(pgExc_SDLError, "Failed to unlock surface in get_at"); return NULL; + } return pgColor_New(rgba); } @@ -805,8 +822,9 @@ surf_set_at(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } int bpp = PG_FORMAT_BytesPerPixel(format); - if (bpp < 1 || bpp > 4) + if (bpp < 1 || bpp > 4) { return RAISE(PyExc_RuntimeError, "invalid color depth for surface"); + } SDL_Rect clip_rect; if (!PG_GetSurfaceClipRect(surf, &clip_rect)) { @@ -816,6 +834,7 @@ surf_set_at(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (x < clip_rect.x || x >= clip_rect.x + clip_rect.w || y < clip_rect.y || y >= clip_rect.y + clip_rect.h) { /* out of clip area */ + Py_RETURN_NONE; } @@ -823,8 +842,15 @@ surf_set_at(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return NULL; } - if (!pgSurface_Lock((pgSurfaceObject *)self)) + if (!pgSurface_Lock((pgSurfaceObject *)self)) { + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + } + PyErr_SetString(pgExc_SDLError, "Failed to lock surface"); + return NULL; + } pixels = (Uint8 *)surf->pixels; switch (bpp) { @@ -859,8 +885,15 @@ surf_set_at(PyObject *self, PyObject *const *args, Py_ssize_t nargs) break; } - if (!pgSurface_Unlock((pgSurfaceObject *)self)) + if (!pgSurface_Unlock((pgSurfaceObject *)self)) { + if (PyErr_Occurred()) { + PyErr_Print(); + PyErr_Clear(); + } + PyErr_SetString(pgExc_SDLError, "Failed to unlock surface in set_at"); + return NULL; + } Py_RETURN_NONE; } @@ -4223,6 +4256,9 @@ MODINIT_DEFINE(surface) if (module == NULL) { return NULL; } + + DISABLE_GIL_SINGLE_INITIALIZATION(module, _module.m_name) + if (pg_warn_simd_at_runtime_but_uncompiled() < 0) { Py_DECREF(module); return NULL; diff --git a/src_c/surflock.c b/src_c/surflock.c index 0786d0fe66..57960deb10 100644 --- a/src_c/surflock.c +++ b/src_c/surflock.c @@ -76,22 +76,28 @@ pgSurface_LockBy(pgSurfaceObject *surfobj, PyObject *lockobj) PyObject *ref; pgSurfaceObject *surf = (pgSurfaceObject *)surfobj; + LOCK_pgSurfaceObject(surfobj); + if (surf->locklist == NULL) { surf->locklist = PyList_New(0); if (surf->locklist == NULL) { + UNLOCK_pgSurfaceObject(surfobj); return 0; } } ref = PyWeakref_NewRef(lockobj, NULL); if (ref == NULL) { + UNLOCK_pgSurfaceObject(surfobj); return 0; } if (ref == Py_None) { Py_DECREF(ref); + UNLOCK_pgSurfaceObject(surfobj); return 0; } if (0 != PyList_Append(surf->locklist, ref)) { Py_DECREF(ref); + UNLOCK_pgSurfaceObject(surfobj); return 0; /* Exception already set. */ } Py_DECREF(ref); @@ -106,8 +112,10 @@ pgSurface_LockBy(pgSurfaceObject *surfobj, PyObject *lockobj) #endif { PyErr_SetString(PyExc_RuntimeError, "error locking surface"); + UNLOCK_pgSurfaceObject(surfobj); return 0; } + return 1; } @@ -167,6 +175,7 @@ pgSurface_UnlockBy(pgSurfaceObject *surfobj, PyObject *lockobj) } if (!found) { + UNLOCK_pgSurfaceObject(surfobj); return noerror; } @@ -181,6 +190,7 @@ pgSurface_UnlockBy(pgSurfaceObject *surfobj, PyObject *lockobj) found--; } + UNLOCK_pgSurfaceObject(surfobj); return noerror; } From 0c7e3d50afa205bae10545e214f4a114ee87c738 Mon Sep 17 00:00:00 2001 From: Andrew Coffey Date: Sun, 2 Mar 2025 08:24:02 -0600 Subject: [PATCH 2/2] surface.convert now works in threads --- examples/multithreaded_surface.py | 30 ++++++++++++++------- src_c/_pygame.h | 1 + src_c/include/_pygame.h | 43 ++++++++++++++++++++++++++++--- src_c/surface.c | 14 +++++++--- 4 files changed, 70 insertions(+), 18 deletions(-) diff --git a/examples/multithreaded_surface.py b/examples/multithreaded_surface.py index 995b7c51b2..1b058e0955 100644 --- a/examples/multithreaded_surface.py +++ b/examples/multithreaded_surface.py @@ -1,10 +1,11 @@ import pygame import threading import random +import sys from time import perf_counter -start = perf_counter() +print(f"GIL Enabled: {sys._is_gil_enabled()}") pygame.init() @@ -13,7 +14,7 @@ surface = pygame.Surface((WIDTH, HEIGHT)) surface.fill("black") -LERP_OFFSET = 0.001 +LERP_OFFSET = 0.01 def get_random_color() -> pygame.Color: @@ -24,8 +25,15 @@ def get_random_color() -> pygame.Color: return pygame.Color(r, g, b, a) +def get_random_format() -> int: + return random.choice([8, 12, 15, 16, 24, 32]) + + def multithreaded_func( - surf: pygame.Surface, target_pixel: tuple[int, int], target_color: pygame.Color + surf: pygame.Surface, + target_pixel: tuple[int, int], + target_color: pygame.Color, + depth: int, ) -> None: lerp_distance = 0 @@ -35,34 +43,36 @@ def multithreaded_func( lerp_distance += LERP_OFFSET new_color = original_color.lerp(target_color, lerp_distance) surf.set_at(target_pixel, new_color) + surf.convert(depth) pixels = [(col, row) for col in range(WIDTH) for row in range(HEIGHT)] colors = [get_random_color() for _ in range(WIDTH * HEIGHT)] -args = [(pixel, colors[i]) for i, pixel in enumerate(pixels)] +depths = [get_random_format() for _ in range(WIDTH * HEIGHT)] + +args = [(pixel, colors[i], depths[i]) for i, pixel in enumerate(pixels)] batches = { i: args[i * NUM_THREADS : (i + 1) * NUM_THREADS] for i in range(WIDTH * HEIGHT // NUM_THREADS) } +start = perf_counter() for batch in batches.values(): threads: list[threading.Thread] = [] - for pixel, color in batch: - new_thread = threading.Thread( - target=multithreaded_func, args=(surface, pixel, color) - ) + for arg in batch: + new_thread = threading.Thread(target=multithreaded_func, args=(surface, *arg)) new_thread.start() threads.append(new_thread) while any([t.is_alive() for t in threads]): continue -pygame.image.save(pygame.transform.scale_by(surface, 10), "out.png") - end = perf_counter() +pygame.image.save(pygame.transform.scale_by(surface, 10), "out.png") + print(f"time taken: {end - start}") for pixel, color in zip(pixels, colors): diff --git a/src_c/_pygame.h b/src_c/_pygame.h index 3bba7ed079..8ebc25c5ac 100644 --- a/src_c/_pygame.h +++ b/src_c/_pygame.h @@ -179,6 +179,7 @@ PG_GetSurfaceFormat(SDL_Surface *surf) #define PG_CreateSurfaceFrom(width, height, format, pixels, pitch) \ SDL_CreateRGBSurfaceWithFormatFrom(pixels, width, height, 0, pitch, format) #define PG_ConvertSurface(src, fmt) SDL_ConvertSurface(src, fmt, 0) + #define PG_ConvertSurfaceFormat(src, pixel_format) \ SDL_ConvertSurfaceFormat(src, pixel_format, 0) diff --git a/src_c/include/_pygame.h b/src_c/include/_pygame.h index 5d5915bff8..911db47265 100644 --- a/src_c/include/_pygame.h +++ b/src_c/include/_pygame.h @@ -322,20 +322,55 @@ typedef struct { PyObject *dependency; #if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED PyMutex mutex; + unsigned int num_locks; + uint64_t locking_thread_id; #endif } pgSurfaceObject; #define pgSurface_AsSurface(x) (((pgSurfaceObject *)x)->surf) #if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED -#define LOCK_pgSurfaceObject(pgSurfacePtr) \ - PyMutex_Lock(&(((pgSurfaceObject *)pgSurfacePtr)->mutex)); +#define LOCK_pgSurfaceObject(pgSurfacePtr) \ + { \ + pgSurfaceObject *surfObj = (pgSurfaceObject *)pgSurfacePtr; \ + uint64_t thread_id = PyThreadState_Get()->id; \ + if (surfObj->num_locks) { \ + if (thread_id == surfObj->locking_thread_id) { \ + ++(surfObj->num_locks); \ + } \ + else { \ + PyMutex_Lock(&surfObj->mutex); \ + surfObj->num_locks = 1U; \ + surfObj->locking_thread_id = thread_id; \ + } \ + } \ + else { \ + PyMutex_Lock(&surfObj->mutex); \ + surfObj->num_locks = 1U; \ + surfObj->locking_thread_id = thread_id; \ + } \ + } #else #define LOCK_pgSurfaceObject(pgSurfacePtr) #endif #if PY_VERSION_HEX >= 0x030D0000 && Py_GIL_DISABLED -#define UNLOCK_pgSurfaceObject(pgSurfacePtr) \ - PyMutex_Unlock(&(((pgSurfaceObject *)pgSurfacePtr)->mutex)); +#define UNLOCK_pgSurfaceObject(pgSurfacePtr) \ + { \ + pgSurfaceObject *surfObj = (pgSurfaceObject *)pgSurfacePtr; \ + uint64_t thread_id = PyThreadState_Get()->id; \ + if (surfObj->num_locks) { \ + if (thread_id == surfObj->locking_thread_id) { \ + if (surfObj->num_locks > 1U) { \ + --(surfObj->num_locks); \ + } \ + else { \ + surfObj->locking_thread_id = 0U; \ + surfObj->num_locks = 0U; \ + PyMutex_Unlock(&surfObj->mutex); \ + } \ + } \ + } \ + } #else #define UNLOCK_pgSurfaceObject(pgSurfacePtr) #endif diff --git a/src_c/surface.c b/src_c/surface.c index cc45fbd1e9..04a30a3110 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -1471,7 +1471,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) SURF_INIT_CHECK(surf) - pgSurface_Prep(self); + pgSurface_Lock(self); if ((has_colorkey = SDL_HasColorKey(surf))) { SDL_GetColorKey(surf, &colorkey); @@ -1513,6 +1513,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) Amask = 0xFF << 24; break; default: + pgSurface_Unlock(self); return RAISE(PyExc_ValueError, "no standard masks exist for given " "bitdepth with alpha"); @@ -1548,6 +1549,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) Bmask = 0xFF; break; default: + pgSurface_Unlock(self); return RAISE(PyExc_ValueError, "nonstandard bit depth given"); } @@ -1565,7 +1567,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) !pg_UintFromObjIndex(argobject, 1, &format.Gmask) || !pg_UintFromObjIndex(argobject, 2, &format.Bmask) || !pg_UintFromObjIndex(argobject, 3, &format.Amask)) { - pgSurface_Unprep(self); + pgSurface_Unlock(self); return RAISE(PyExc_ValueError, "invalid color masks given"); } @@ -1576,7 +1578,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) break; } else { - pgSurface_Unprep(self); + pgSurface_Unlock(self); return RAISE( PyExc_ValueError, "invalid argument specifying new format to convert to"); @@ -1611,7 +1613,9 @@ surf_convert(pgSurfaceObject *self, PyObject *args) SDL_SetPixelFormatPalette(&format, palette); } } + // pgSurface_Lock(self); newsurf = PG_ConvertSurface(surf, &format); + // pgSurface_Unlock(self); SDL_SetSurfaceBlendMode(newsurf, SDL_BLENDMODE_NONE); SDL_FreePalette(palette); } @@ -1623,6 +1627,7 @@ surf_convert(pgSurfaceObject *self, PyObject *args) } if (newsurf == NULL) { + pgSurface_Unlock(self); return RAISE(pgExc_SDLError, SDL_GetError()); } @@ -1630,12 +1635,13 @@ surf_convert(pgSurfaceObject *self, PyObject *args) colorkey = SDL_MapRGBA(newsurf->format, key_r, key_g, key_b, key_a); if (SDL_SetColorKey(newsurf, SDL_TRUE, colorkey) != 0) { PyErr_SetString(pgExc_SDLError, SDL_GetError()); + pgSurface_Unlock(self); SDL_FreeSurface(newsurf); return NULL; } } - pgSurface_Unprep(self); + pgSurface_Unlock(self); final = surf_subtype_new(Py_TYPE(self), newsurf, 1); if (!final)