From 481056c21a2b9108560650fd480fb6efddb58e38 Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Tue, 13 Sep 2022 09:43:07 +0200 Subject: [PATCH 1/2] fix GPU code --- saxstats/saxstats.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/saxstats/saxstats.py b/saxstats/saxstats.py index c388a11..f339307 100755 --- a/saxstats/saxstats.py +++ b/saxstats/saxstats.py @@ -131,9 +131,13 @@ def abs2(x): def mybinmean(xravel,binsravel,xcount=None,DENSS_GPU=False): if DENSS_GPU: + if isinstance(binsravel, np.ndarray): + binsravel = cp.asarray(binsravel) xsum = cp.bincount(binsravel, xravel) if xcount is None: xcount = cp.bincount(binsravel) + elif isinstance(xcount, np.ndarray): + xcount = cp.asarray(xcount) return xsum/xcount else: xsum = np.bincount(binsravel, xravel) @@ -1230,6 +1234,8 @@ def denss(q, I, sigq, dmax, ne=None, voxel=5., oversampling=3., recenter=True, r #Error Reduction newrho *= 0 + if DENSS_GPU: + newrho = cp.asarray(newrho) newrho[support] = rhoprime[support] # enforce positivity by making all negative density points zero. From 8b9ec14efb424f6ca95a3ac6324ea0c05c96e64b Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Wed, 14 Sep 2022 13:13:12 +0200 Subject: [PATCH 2/2] performance optimization of the function abs2 on GPU --- saxstats/saxstats.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/saxstats/saxstats.py b/saxstats/saxstats.py index f339307..be2308e 100755 --- a/saxstats/saxstats.py +++ b/saxstats/saxstats.py @@ -83,7 +83,6 @@ #it works, but often results in nans randomly PYFFTW = False - def myfftn(x, DENSS_GPU=False): if DENSS_GPU: return cp.fft.fftn(x) @@ -118,10 +117,16 @@ def myabs(x, out=None,DENSS_GPU=False): else: return np.abs(x,out=out) -# @numba.vectorize([numba.float64(numba.complex128),numba.float32(numba.complex64)]) -def abs2(x): +if CUPY_LOADED: + gpu_abs2 = cp.ElementwiseKernel( + 'complex128 z', 'float64 d', 'double re=real(z), im=imag(z); d=re*re+im*im;', "gpu_abs2") + +def abs2(x, DENSS_GPU=False): #a faster way to calculate abs(x)**2, for calculating intensities - return x.real**2 + x.imag**2 + if DENSS_GPU: + return gpu_abs2(x) + else: + return x.real**2 + x.imag**2 # @numba.jit(nopython=True) # def mybincount(x, weights): @@ -1212,7 +1217,7 @@ def denss(q, I, sigq, dmax, ne=None, voxel=5., oversampling=3., recenter=True, r #APPLY RECIPROCAL SPACE RESTRAINTS #calculate spherical average of intensities from 3D Fs # I3D = myabs(F, out=I3D, DENSS_GPU=DENSS_GPU)**2 - I3D = abs2(F) + I3D = abs2(F, DENSS_GPU=DENSS_GPU) Imean = mybinmean(I3D.ravel(), qblravel, xcount=xcount, DENSS_GPU=DENSS_GPU) #scale Fs to match data