Skip to content

Refactor and consolidate all SIMD handlers #3352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 110 additions & 111 deletions src_c/alphablit.c

Large diffs are not rendered by default.

29 changes: 5 additions & 24 deletions src_c/simd_blitters.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
#define NO_PYGAME_C_API
#include "_surface.h"
#include "_blit_info.h"
#include "simd_shared.h"

#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
// passed
#define PG_ENABLE_ARM_NEON 1
#endif

#if (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON))
#ifdef PG_HAS_SSE2_OR_NEON
void
alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info);
void
Expand Down Expand Up @@ -37,26 +32,11 @@ void
blit_blend_rgb_min_sse2(SDL_BlitInfo *info);
void
blit_blend_premultiplied_sse2(SDL_BlitInfo *info);
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */

/* Deliberately putting these outside of the preprocessor guards as I want to
move to a system of trusting the runtime checks to head to the right
function and having a fallback function there if pygame is not compiled
with the right stuff (this is the strategy used for AVX2 right now.
Potentially I might want to shift both these into a slightly different
file as they are not exactly blits (though v. similar) - or I could rename
the SIMD trilogy of files to replace the word blit with something more
generic like surface_ops*/

void
premul_surf_color_by_alpha_non_simd(SDL_Surface *src,
PG_PixelFormat *src_format,
SDL_Palette *src_palette, SDL_Surface *dst,
PG_PixelFormat *dst_format,
SDL_Palette *dst_palette);
void
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst);
#endif /* PG_HAS_SSE2_OR_NEON */

#ifdef PG_HAS_AVX2
void
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info);
void
Expand Down Expand Up @@ -87,3 +67,4 @@ void
blit_blend_premultiplied_avx2(SDL_BlitInfo *info);
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst);
#endif
195 changes: 4 additions & 191 deletions src_c/simd_blitters_avx2.c
Original file line number Diff line number Diff line change
@@ -1,44 +1,16 @@
#include "simd_blitters.h"

#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
#include <immintrin.h>
#endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */

#define BAD_AVX2_FUNCTION_CALL \
printf( \
"Fatal Error: Attempted calling an AVX2 function when both compile " \
"time and runtime support is missing. If you are seeing this " \
"message, you have stumbled across a pygame bug, please report it " \
"to the devs!"); \
PG_EXIT(1)

/* helper function that does a runtime check for AVX2. It has the added
* functionality of also returning 0 if compile time support is missing */
int
pg_has_avx2()
{
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
return SDL_HasAVX2();
#else
return 0;
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */
}

/* This returns 1 when avx2 is available at runtime but support for it isn't
* compiled in, 0 in all other cases */
int
pg_avx2_at_runtime_but_uncompiled()
{
if (SDL_HasAVX2()) {
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
#ifdef PG_HAS_AVX2
return 0;
#else
return 1;
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */
#endif /* PG_HAS_AVX2 */
}
return 0;
}
Expand Down Expand Up @@ -190,8 +162,7 @@ pg_avx2_at_runtime_but_uncompiled()
_mm256_srli_epi16( \
_mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7);

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
#ifdef PG_HAS_AVX2
void
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -258,17 +229,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
* surfaces. */
pixels_dst = _mm256_and_si256(pixels_dst, mask_out_alpha);)
}
#else
void
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -324,17 +285,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
shuff_dst =
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
}
#else
void
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -406,17 +357,6 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
shuff_dst =
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
}
#else
void
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -524,17 +464,6 @@ blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -653,17 +582,6 @@ blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -725,17 +643,6 @@ blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -805,17 +712,6 @@ blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -877,17 +773,6 @@ blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -957,17 +842,6 @@ blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1029,17 +903,6 @@ blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1109,17 +972,6 @@ blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1181,17 +1033,6 @@ blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1261,17 +1102,6 @@ blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1521,14 +1351,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
dstp = (Uint32 *)dstp256 + dstskip;
}
}
#else
void
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */

#define PREMUL_ALPHA_CODE \
/* extract the alpha */ \
Expand Down Expand Up @@ -1558,8 +1380,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
/*add the original alpha back in*/ \
mm_dst = _mm256_or_si256(mm_dst, mm_alpha_in);

#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
{
Expand Down Expand Up @@ -1635,11 +1455,4 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
dstp += dst_skip;
}
}
#else
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
{
BAD_AVX2_FUNCTION_CALL;
}
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H) */
#endif /* PG_HAS_AVX2 */
Loading
Loading