From 2fbcb047a8c294fb602fba2cff2b5d4425f9c527 Mon Sep 17 00:00:00 2001 From: PONS Date: Mon, 18 Aug 2025 13:25:07 +0200 Subject: [PATCH 01/13] Added kwargs to get_acceptance() calls to allow specific GPU parameters to be passed --- pyat/at/acceptance/acceptance.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyat/at/acceptance/acceptance.py b/pyat/at/acceptance/acceptance.py index 773b6a255..25f228bc6 100644 --- a/pyat/at/acceptance/acceptance.py +++ b/pyat/at/acceptance/acceptance.py @@ -41,6 +41,7 @@ def get_acceptance( divider: int = 2, shift_zero: float = 1.0e-6, start_method: str | None = None, + **kwargs ): # noinspection PyUnresolvedReferences r"""Computes the acceptance at ``repfts`` observation points @@ -116,7 +117,6 @@ def get_acceptance( do most of the work in a single function call and allows for full parallelization. """ - kwargs = {} if start_method is not None: kwargs["start_method"] = start_method @@ -198,6 +198,7 @@ def get_1d_acceptance( divider: int | None = 2, shift_zero: float | None = 1.0e-6, start_method: str | None = None, + **kwargs ): r"""Computes the 1D acceptance at ``refpts`` observation points @@ -289,6 +290,7 @@ def get_1d_acceptance( divider=divider, shift_zero=shift_zero, offset=offset, + **kwargs ) return np.squeeze(b), s, g From 97c60f3e49318bd73bab97b9f6afeb7050e88d94 Mon Sep 17 00:00:00 2001 From: PONS Date: Mon, 18 Aug 2025 13:25:29 +0200 Subject: [PATCH 02/13] Added trace --- atgpu/PyInterface.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp index 539b78b40..85ace2131 100755 --- a/atgpu/PyInterface.cpp +++ b/atgpu/PyInterface.cpp @@ -309,7 +309,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { try { if( verbose ) - cout << "Tracking " << num_particles << " particles for " << num_turns << " turns on " << gpuLattice->getGPUContext()->name() << " #" << gpuId << endl; + cout << "Tracking " << num_particles << " particles for " << num_turns << " turns on " << + gpuLattice->getGPUContext()->name() << " #" << gpuId << " Integrator: #" << integratorType << endl; npy_intp outdims[4] = {6,(npy_intp)(num_particles),num_refs,num_turns}; PyObject *rout = PyArray_EMPTY(4, outdims, floatType, 1); From 724a8b4ed8876cd59e016d4f3b654327ada14d28 Mon Sep 17 00:00:00 2001 From: PONS Date: Tue, 19 Aug 2025 08:40:12 +0200 Subject: [PATCH 03/13] Optimized projection for boundary search (GridMode RADIAL or CARTESIAN) --- atgpu/Lattice.cpp | 28 ++----- atgpu/Lattice.h | 2 +- atgpu/PyInterface.cpp | 42 +++++++--- atgpu/main.cpp | 12 +-- pyat/at.c | 142 ++++++++++++++++++++------------- pyat/at/acceptance/boundary.py | 18 +++-- 6 files changed, 136 insertions(+), 108 deletions(-) diff --git a/atgpu/Lattice.cpp b/atgpu/Lattice.cpp index 3e63fc815..09c9e104e 100755 --- a/atgpu/Lattice.cpp +++ b/atgpu/Lattice.cpp @@ -200,7 +200,6 @@ void Lattice::generateGPUKernel() { // GPU main track function code.append(kType + "void track(" + mType + "RING_PARAM *ringParam," + mType + "ELEMENT* gpuRing,\n" " uint32_t startElem,uint32_t nbElementToProcess,\n" - " " + mType + "int32_t *elemOffsets, uint32_t offsetStride,\n" " uint32_t nbPart," + mType + "AT_FLOAT* rin," + mType + "AT_FLOAT* rout,\n" " " + mType + "uint32_t* lost, uint32_t turn,\n" " " + mType + "int32_t *refpts, uint32_t nbRef,\n" @@ -228,12 +227,9 @@ void Lattice::generateGPUKernel() { code.append(" sr6[4] = rin[4 + 6*threadId];\n"); // d = (pz-p0)/p0 code.append(" sr6[5] = rin[5 + 6*threadId];\n"); // c.tau (time lag) - // The tracking starts at elem elemOffset (equivalent to lattice.rotate(elemOffset)) - code.append(" int32_t elemOffset = elemOffsets?elemOffsets[threadId/offsetStride]:0;\n"); - // Loop over elements code.append(" while(!pLost && elemType) {\n"); factory.generatePassMethodsCalls(code); @@ -307,7 +303,7 @@ uint64_t Lattice::fillGPUMemory() { } void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef, - uint32_t *refPts,uint32_t nbElemOffset,uint32_t *elemOffsets, + uint32_t *refPts,uint32_t startElem,uint32_t endElem, uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord, bool updateRin) { @@ -371,28 +367,14 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r if(lostAtElem) gpu->allocDevice(&gpuLostAtElem, nbParticles * sizeof(uint32_t)); if(lostAtCoord) gpu->allocDevice(&gpuLostAtCoord, nbParticles * 6 * sizeof(AT_FLOAT)); - // Starting element (lattice rotation) - void *gpuOffsets = nullptr; - uint32_t offsetStride = 0; - if(nbElemOffset) { - gpu->allocDevice(&gpuOffsets, nbElemOffset * sizeof(int32_t)); - gpu->hostToDevice(gpuOffsets, elemOffsets, nbElemOffset * sizeof(int32_t)); - // nbParticles % nbElemOffset must be 0 - // offsetStride (nbParticles/nbElemOffset) should be a multiple of 64 - offsetStride = nbParticles / nbElemOffset; - } - // Call GPU gpu->resetArg(); - uint32_t startElem; - uint32_t nbElemToProcess = (uint32_t)elements.size(); + uint32_t nbElemToProcess = (endElem+1) - startElem; uint32_t turn; gpu->addArg(sizeof(void *),&gpuRingParams); // Global ring params gpu->addArg(sizeof(void *),&gpuRing); // The lattice gpu->addArg(sizeof(uint32_t),&startElem); // Process from gpu->addArg(sizeof(uint32_t),&nbElemToProcess); // Number of element to process - gpu->addArg(sizeof(void *),&gpuOffsets); // Tracking start offsets - gpu->addArg(sizeof(uint32_t),&offsetStride); // Number of particle which starts at elem specified in gpuOffsets gpu->addArg(sizeof(uint32_t),&nbParticles); // Total number of particle to track gpu->addArg(sizeof(void *),&gpuRin); // Input 6D coordinates gpu->addArg(sizeof(void *),&gpuRout); // Output 6D coordinates @@ -405,8 +387,9 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r // Turn loop for(turn=0;turnrun(nbParticles); // 1 particle per thread + startElem = 0; + nbElemToProcess = (uint32_t)elements.size(); } // Get back data @@ -441,7 +424,6 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r gpu->freeDevice(gpuRingParams); if( gpuLostAtElem ) gpu->freeDevice(gpuLostAtElem); if( gpuLostAtCoord ) gpu->freeDevice(gpuLostAtCoord); - if( gpuOffsets ) gpu->freeDevice(gpuOffsets); delete[] expandedRefPts; delete[] lost; diff --git a/atgpu/Lattice.h b/atgpu/Lattice.h index e3e2b5bed..e5785854c 100644 --- a/atgpu/Lattice.h +++ b/atgpu/Lattice.h @@ -27,7 +27,7 @@ class Lattice { void generateGPUKernel(); // Run the simulation void run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,uint32_t *refPts, - uint32_t nbElemOffset,uint32_t *elemOffsets,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord, + uint32_t startElem,uint32_t endElem,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord, bool updateRin); // Return handle to the GPU context GPUContext *getGPUContext(); diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp index 85ace2131..42575a24c 100755 --- a/atgpu/PyInterface.cpp +++ b/atgpu/PyInterface.cpp @@ -41,18 +41,15 @@ static PyMethodDef AtGPUMethods[] = { " reuse: if True, use previously cached description of the lattice.\n" " losses: if True, process losses\n" " gpu_pool: List of GPU id to use\n" - " tracking_starts: numpy array of indices of elements where tracking should start.\n" - " len(tracking_start) must divide the number of particle and it gives the stride size.\n" - " The i-th particle of rin starts at elem tracking_start[i/stride].\n" - " The behavior is similar to lattice.rotate(tracking_starts[i/stride]).\n" - " The stride size should be multiple of 64 for best performance.\n" " integrator: Type of integrator to use.\n" " 1: Euler 1st order, 1 drift/1 kick per step.\n" " 2: Mclachlan 2nd order, 2 drift/2 kicks per step.\n" " 3: Ruth 3rd order, 3 drifts/3 kicks per step.\n" " 4: Forest/Ruth 4th order, 4 drifts/3 kicks per step (Default).\n" " 5: Optimal 4th order from R. Mclachlan, 4 drifts/4 kicks per step.\n" - " 6: Yoshida 6th order, 8 drifts/7 kicks per step.\n\n" + " 6: Yoshida 6th order, 8 drifts/7 kicks per step.\n" + " start_elem: start tracking at element #start_elem\n" + " end_elem: end tracking at element #end_elem (inclusive)\n\n" "Returns:\n" " rout: 6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n" " of particle coordinates\n\n" @@ -138,7 +135,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { "energy", "particle", "keep_counter", "reuse","losses", "bunch_spos", "bunch_currents", "gpu_pool", - "tracking_starts","integrator","verbose", + "integrator","verbose", + "start_elem","end_elem", nullptr}; NPY_TYPES floatType; @@ -168,10 +166,12 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { int counter=0; int losses=0; int integratorType=4; + int startElem=0; + int endElem=-1; double t0,t1; // Get input args - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!pppO!O!O!O!ip", const_cast(kwlist), + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!pppO!O!O!ipii", const_cast(kwlist), &PyList_Type, &lattice, &PyArray_Type, &rin, &num_turns, @@ -185,9 +185,10 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { &PyArray_Type, &bspos, &PyArray_Type, &bcurrents, &PyList_Type, &gpupool, - &PyArray_Type, &trackstarts, &integratorType, - &verbose + &verbose, + &startElem, + &endElem )) { return nullptr; } @@ -292,6 +293,23 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { } + int num_elements = gpuLattice->getNbElement(); + + // Check for partial turn tracking + if(endElem==-1) endElem = num_elements-1; + if(startElem<0 || startElem>=num_elements) { + return PyErr_Format(PyExc_ValueError, "start_elem out of range [0..num_elements-1]"); + } + if(endElem<0 || endElem>=num_elements) { + return PyErr_Format(PyExc_ValueError, "end_elem out of range [0..num_elements-1]"); + } + if(startElem>endElem) { + return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); + } + if((endElem!=num_elements-1) && (num_turns!=1)) { + return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + } + // Load lattice on the GPU try { uint64_t size = gpuLattice->fillGPUMemory(); @@ -331,7 +349,7 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { bool *xlostPtr = (bool *)PyArray_DATA((PyArrayObject *)xlost); AT_FLOAT *xlostcoordPtr = (AT_FLOAT *)PyArray_DATA((PyArrayObject *)xlostcoord); - gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts, + gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,startElem,endElem, xnturnPtr,xnelemPtr,xlostcoordPtr,true); // Format result for AT @@ -356,7 +374,7 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { } else { - gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts, + gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,startElem,endElem, nullptr,nullptr,nullptr,true); return rout; diff --git a/atgpu/main.cpp b/atgpu/main.cpp index 1e3a85e2b..0630df41f 100755 --- a/atgpu/main.cpp +++ b/atgpu/main.cpp @@ -134,7 +134,7 @@ void integratorTest(int gpu,string latticeName) { // Choose an arc close to 1mm where unexpected tune drift is observed when step size in too small (EBS lattice) AT_FLOAT *rin = createArc(0.001,M_PI/2.0,-M_PI/2.0,nbPart); - l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, nullptr, nullptr, nullptr, nullptr, false); + l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, l->getNbElement()-1, nullptr, nullptr, nullptr, false); double err = 0; double max = 0; @@ -209,9 +209,6 @@ void performanceTest(int gpu,string latticeName) { uint32_t nbPart = nbX * nbY; uint32_t refs[] = {l->getNbElement()}; uint32_t nbRef = sizeof(refs) / sizeof(uint32_t); - uint32_t starts[] = {0,100,200,300,400,500,600,700}; - uint32_t nbStride = sizeof(starts) / sizeof(uint32_t); - //uint32_t nbStride = 0; uint64_t routSize = nbTurn * nbPart * nbRef * 6 * sizeof(AT_FLOAT); AT_FLOAT *rout = (AT_FLOAT *) malloc(routSize); @@ -222,7 +219,7 @@ void performanceTest(int gpu,string latticeName) { AT_FLOAT *lostAtCoord = new AT_FLOAT[nbPart * 6]; t0 = AbstractGPU::get_ticks(); - l->run(nbTurn, nbPart, rin, rout, nbRef, refs, nbStride, starts, lostAtTurn, lostAtElem, lostAtCoord,false); + l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, l->getNbElement()-1, lostAtTurn, lostAtElem, lostAtCoord,false); t1 = AbstractGPU::get_ticks(); AT_FLOAT *P = &rin[114*6]; @@ -231,9 +228,8 @@ void performanceTest(int gpu,string latticeName) { cout << "Lost elem:" << lostAtElem[114] << endl; cout << "Lost coord:" << lostAtCoord[114*6] << "," << lostAtCoord[114*6+2] << endl; - uint32_t strideSize = nbPart / nbStride; - for(int stride=0; stride < nbStride; stride++) { - P = ROUTPTR(stride*strideSize, 0, nbTurn - 1); + for(int stride=0; stride < 8; stride++) { + P = ROUTPTR(stride*8, 0, nbTurn - 1); cout << "[" << stride << "] " << P[0] << " " << P[1] << " " << P[2] << " " << P[3] << " " << P[4] << " " << P[5] << endl; } diff --git a/pyat/at.c b/pyat/at.c index 8a7d673a2..2bc7c8c7b 100644 --- a/pyat/at.c +++ b/pyat/at.c @@ -332,12 +332,14 @@ void set_current_fillpattern(PyArrayObject *bspos, PyArrayObject *bcurrents, * - nturns: int number of turns to simulate * - refpts: numpy uint32 array denoting elements at which to return state * - reuse: whether to reuse the cached state of the ring + * - start_elem: start tracking at element #start_elem + * - end_elem: end tracking at element #end_elem (inclusive) */ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = {"line","rin","nturns","refpts","turn", "energy", "particle", "keep_counter", "reuse","omp_num_threads","losses", - "bunch_spos", "bunch_currents", NULL}; + "bunch_spos", "bunch_currents", "start_elem", "end_elem", NULL}; static double lattice_length = 0.0; static int last_turn = 0; static int valid = 0; @@ -363,6 +365,8 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { npy_uint32 omp_num_threads=0; npy_uint32 num_particles, np6; npy_uint32 elem_index; + npy_uint32 start_elem = 0; + npy_uint32 end_elem = INT32_MAX; npy_uint32 *refpts = NULL; npy_uint32 nextref; unsigned int nextrefindex; @@ -387,12 +391,13 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { bspos=NULL; bcurrents=NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!ppIpO!O!", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!ppIpO!O!ii", kwlist, &PyList_Type, &lattice, &PyArray_Type, &rin, &num_turns, &PyArray_Type, &refs, &counter, &PyFloat_Type ,&energy, particle_type, &particle, &keep_counter, &keep_lattice, &omp_num_threads, &losses, - &PyArray_Type, &bspos, &PyArray_Type, &bcurrents)) { + &PyArray_Type, &bspos, &PyArray_Type, &bcurrents, + &start_elem,&end_elem)) { return NULL; } if (PyArray_DIM(rin,0) != 6) { @@ -411,61 +416,14 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { param.rest_energy=0.0; param.charge=-1.0; param.num_turns=num_turns; - param.bdiff = NULL; - + param.bdiff = NULL; if (keep_counter) param.nturn = last_turn; else param.nturn = counter; - set_energy_particle(lattice, energy, particle, ¶m); set_current_fillpattern(bspos, bcurrents, ¶m); - num_particles = (PyArray_SIZE(rin)/6); - np6 = num_particles*6; - drin = PyArray_DATA(rin); - - if (refs) { - if (PyArray_TYPE(refs) != NPY_UINT32) { - return PyErr_Format(PyExc_ValueError, "refpts is not a uint32 array"); - } - refpts = PyArray_DATA(refs); - num_refpts = PyArray_SIZE(refs); - } - else { - refpts = NULL; - num_refpts = 0; - } - outdims[0] = 6; - outdims[1] = num_particles; - outdims[2] = num_refpts; - outdims[3] = num_turns; - rout = PyArray_EMPTY(4, outdims, NPY_DOUBLE, 1); - drout = PyArray_DATA((PyArrayObject *)rout); - - if(losses){ - pdims[0]= num_particles; - lxdims[0]= 6; - lxdims[1]= num_particles; - xnturn = PyArray_EMPTY(1, pdims, NPY_UINT32, 1); - xnelem = PyArray_EMPTY(1, pdims, NPY_UINT32, 1); - xlost = PyArray_EMPTY(1, pdims, NPY_BOOL, 1); - xlostcoord = PyArray_EMPTY(2, lxdims, NPY_DOUBLE, 1); - ixnturn = PyArray_DATA((PyArrayObject *)xnturn); - ixnelem = PyArray_DATA((PyArrayObject *)xnelem); - bxlost = PyArray_DATA((PyArrayObject *)xlost); - dxlostcoord = PyArray_DATA((PyArrayObject *)xlostcoord); - unsigned int i; - static double r0[6]; - for(i=0;i 0) && (num_particles > OMP_PARTICLE_THRESHOLD)) { unsigned int nthreads = omp_get_num_procs(); @@ -544,18 +502,79 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { valid = 0; } + /* Check for partial turn tracking */ + if(end_elem==INT32_MAX) end_elem = num_elements-1; + if(start_elem<0 || start_elem>=num_elements) { + return PyErr_Format(PyExc_ValueError, "start_elem out of range [0..num_elements-1]"); + } + if(end_elem<0 || end_elem>=num_elements) { + return PyErr_Format(PyExc_ValueError, "end_elem out of range [0..num_elements-1]"); + } + if(start_elem>end_elem) { + return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); + } + if((end_elem!=num_elements-1) && (num_turns!=1)) { + return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + } + + /* Particle energy */ param.RingLength = lattice_length; if (param.rest_energy == 0.0) { param.T0 = param.RingLength/C0; - } - else { + } else { double gamma0 = param.energy/param.rest_energy; double betagamma0 = sqrt(gamma0*gamma0 - 1.0); double beta0 = betagamma0/gamma0; param.T0 = param.RingLength/beta0/C0; } + set_energy_particle(lattice, energy, particle, ¶m); + + /* Memory allocation for outputs */ + num_particles = (PyArray_SIZE(rin)/6); + np6 = num_particles*6; + drin = PyArray_DATA(rin); + + if (refs) { + if (PyArray_TYPE(refs) != NPY_UINT32) { + return PyErr_Format(PyExc_ValueError, "refpts is not a uint32 array"); + } + refpts = PyArray_DATA(refs); + num_refpts = PyArray_SIZE(refs); + } else { + refpts = NULL; + num_refpts = 0; + } + outdims[0] = 6; + outdims[1] = num_particles; + outdims[2] = num_refpts; + outdims[3] = num_turns; + rout = PyArray_EMPTY(4, outdims, NPY_DOUBLE, 1); + drout = PyArray_DATA((PyArrayObject *)rout); + + if(losses){ + pdims[0]= num_particles; + lxdims[0]= 6; + lxdims[1]= num_particles; + xnturn = PyArray_EMPTY(1, pdims, NPY_UINT32, 1); + xnelem = PyArray_EMPTY(1, pdims, NPY_UINT32, 1); + xlost = PyArray_EMPTY(1, pdims, NPY_BOOL, 1); + xlostcoord = PyArray_EMPTY(2, lxdims, NPY_DOUBLE, 1); + ixnturn = PyArray_DATA((PyArrayObject *)xnturn); + ixnelem = PyArray_DATA((PyArrayObject *)xnelem); + bxlost = PyArray_DATA((PyArrayObject *)xlost); + dxlostcoord = PyArray_DATA((PyArrayObject *)xlostcoord); + unsigned int i; + static double r0[6]; + for(i=0;i Date: Tue, 19 Aug 2025 09:29:17 +0200 Subject: [PATCH 04/13] Fix for empty lattice --- atgpu/Lattice.cpp | 2 +- atgpu/PyInterface.cpp | 35 ++++++++++++++++++++++------------- pyat/at.c | 37 +++++++++++++++++++++++-------------- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/atgpu/Lattice.cpp b/atgpu/Lattice.cpp index 09c9e104e..d01baf7fe 100755 --- a/atgpu/Lattice.cpp +++ b/atgpu/Lattice.cpp @@ -369,7 +369,7 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r // Call GPU gpu->resetArg(); - uint32_t nbElemToProcess = (endElem+1) - startElem; + uint32_t nbElemToProcess = endElem - startElem; uint32_t turn; gpu->addArg(sizeof(void *),&gpuRingParams); // Global ring params gpu->addArg(sizeof(void *),&gpuRing); // The lattice diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp index 42575a24c..60782e0d4 100755 --- a/atgpu/PyInterface.cpp +++ b/atgpu/PyInterface.cpp @@ -49,7 +49,7 @@ static PyMethodDef AtGPUMethods[] = { " 5: Optimal 4th order from R. Mclachlan, 4 drifts/4 kicks per step.\n" " 6: Yoshida 6th order, 8 drifts/7 kicks per step.\n" " start_elem: start tracking at element #start_elem\n" - " end_elem: end tracking at element #end_elem (inclusive)\n\n" + " end_elem: end tracking at element #end_elem (exclusive)\n\n" "Returns:\n" " rout: 6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n" " of particle coordinates\n\n" @@ -296,18 +296,27 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) { int num_elements = gpuLattice->getNbElement(); // Check for partial turn tracking - if(endElem==-1) endElem = num_elements-1; - if(startElem<0 || startElem>=num_elements) { - return PyErr_Format(PyExc_ValueError, "start_elem out of range [0..num_elements-1]"); - } - if(endElem<0 || endElem>=num_elements) { - return PyErr_Format(PyExc_ValueError, "end_elem out of range [0..num_elements-1]"); - } - if(startElem>endElem) { - return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); - } - if((endElem!=num_elements-1) && (num_turns!=1)) { - return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + if(endElem==-1) endElem = num_elements; + if(num_elements!=0) { + if(startElem<0 || startElem>=num_elements) { + char errMsg[64]; + sprintf(errMsg,"start_elem out of range, %d not in [0..%d]",startElem,num_elements-1); + return PyErr_Format(PyExc_ValueError, errMsg); + } + if(endElem<0 || endElem>num_elements) { + char errMsg[64]; + sprintf(errMsg,"end_elem out of range, %d not in [0..%d]",endElem,num_elements); + return PyErr_Format(PyExc_ValueError, errMsg); + } + if(startElem>endElem) { + return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); + } + if((endElem!=num_elements) && (num_turns!=1)) { + return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + } + } else { + startElem = 0; + endElem = 0; } // Load lattice on the GPU diff --git a/pyat/at.c b/pyat/at.c index 2bc7c8c7b..5c96b1455 100644 --- a/pyat/at.c +++ b/pyat/at.c @@ -333,7 +333,7 @@ void set_current_fillpattern(PyArrayObject *bspos, PyArrayObject *bcurrents, * - refpts: numpy uint32 array denoting elements at which to return state * - reuse: whether to reuse the cached state of the ring * - start_elem: start tracking at element #start_elem - * - end_elem: end tracking at element #end_elem (inclusive) + * - end_elem: end tracking at element #end_elem (exclusive) */ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = {"line","rin","nturns","refpts","turn", @@ -503,18 +503,27 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { } /* Check for partial turn tracking */ - if(end_elem==INT32_MAX) end_elem = num_elements-1; - if(start_elem<0 || start_elem>=num_elements) { - return PyErr_Format(PyExc_ValueError, "start_elem out of range [0..num_elements-1]"); - } - if(end_elem<0 || end_elem>=num_elements) { - return PyErr_Format(PyExc_ValueError, "end_elem out of range [0..num_elements-1]"); - } - if(start_elem>end_elem) { - return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); - } - if((end_elem!=num_elements-1) && (num_turns!=1)) { - return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + if(end_elem==INT32_MAX) end_elem = num_elements; + if(num_elements!=0) { + if(start_elem<0 || start_elem>=num_elements) { + char errMsg[64]; + sprintf(errMsg,"start_elem out of range, %d not in [0..%d]",start_elem,num_elements-1); + return PyErr_Format(PyExc_ValueError, errMsg); + } + if(end_elem<0 || end_elem>num_elements) { + char errMsg[64]; + sprintf(errMsg,"end_elem out of range, %d not in [0..%d]",start_elem,num_elements); + return PyErr_Format(PyExc_ValueError, errMsg); + } + if(start_elem>end_elem) { + return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem"); + } + if((end_elem!=num_elements) && (num_turns!=1)) { + return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1"); + } + } else { + start_elem = 0; + end_elem = 0; } /* Particle energy */ @@ -597,7 +606,7 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { /*PySys_WriteStdout("turn: %i\n", param.nturn);*/ nextrefindex = 0; nextref= (nextrefindex Date: Tue, 19 Aug 2025 09:55:09 +0200 Subject: [PATCH 05/13] Fix wrong memory freeing --- pyat/at.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyat/at.c b/pyat/at.c index 5c96b1455..dff4d76e3 100644 --- a/pyat/at.c +++ b/pyat/at.c @@ -479,10 +479,10 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *el = PyList_GET_ITEM(lattice, elem_index); PyObject *PyPassMethod = PyObject_GetAttrString(el, "PassMethod"); double length; - if (!PyPassMethod) return print_error(elem_index, rout); /* No PassMethod: AttributeError */ + if (!PyPassMethod) return NULL; /* No PassMethod: AttributeError */ LibraryListPtr = get_track_function(PyUnicode_AsUTF8(PyPassMethod)); Py_DECREF(PyPassMethod); - if (!LibraryListPtr) return print_error(elem_index, rout); /* No trackFunction for the given PassMethod: RuntimeError */ + if (!LibraryListPtr) return NULL; /* No trackFunction for the given PassMethod: RuntimeError */ pylength = PyObject_GetAttrString(el, "Length"); if (pylength) { length = PyFloat_AsDouble(pylength); From bdd31d3668aa8aaedbf60010984d604ca3e3a26a Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 09:44:49 +0200 Subject: [PATCH 06/13] Run black --- pyat/at/acceptance/boundary.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyat/at/acceptance/boundary.py b/pyat/at/acceptance/boundary.py index 9ed9100ad..b2b9d5f55 100644 --- a/pyat/at/acceptance/boundary.py +++ b/pyat/at/acceptance/boundary.py @@ -342,9 +342,9 @@ def grid_boundary_search( grids = [] if offset is None: - _,offsets = ring.find_orbit(dp=dp,refpts=obspt) + _, offsets = ring.find_orbit(dp=dp, refpts=obspt) else: - offsets = offset + offsets = offset for i, obs, orbit in zip(numpy.arange(len(obspt)), obspt, offsets): parts, grid = get_parts(config, orbit) @@ -358,7 +358,13 @@ def grid_boundary_search( ) ) ring.track( - parts, use_mp=use_mp, in_place=True, refpts=None,start_elem=obs,keep_lattice=True,**kwargs + parts, + use_mp=use_mp, + in_place=True, + refpts=None, + start_elem=obs, + keep_lattice=True, + **kwargs, ) allparts.append(parts) grids.append(grid) @@ -401,7 +407,6 @@ def recursive_boundary_search( def search_boundary( planesi, angles, rtol, rsteps, nturns, offset, use_mp, **kwargs ): - ftol = min(rtol / rsteps) cs = numpy.squeeze([numpy.cos(angles), numpy.sin(angles)]) cs = numpy.around(cs, decimals=9) From baca5738a73ac2de03e6e49ab01d8d99db5ffdbc Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 10:14:11 +0200 Subject: [PATCH 07/13] end_elem initialized to 0 --- pyat/at.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyat/at.c b/pyat/at.c index dff4d76e3..072ae2efd 100644 --- a/pyat/at.c +++ b/pyat/at.c @@ -366,7 +366,7 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { npy_uint32 num_particles, np6; npy_uint32 elem_index; npy_uint32 start_elem = 0; - npy_uint32 end_elem = INT32_MAX; + npy_uint32 end_elem = 0; npy_uint32 *refpts = NULL; npy_uint32 nextref; unsigned int nextrefindex; @@ -503,7 +503,7 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) { } /* Check for partial turn tracking */ - if(end_elem==INT32_MAX) end_elem = num_elements; + if(end_elem==0) end_elem = num_elements; if(num_elements!=0) { if(start_elem<0 || start_elem>=num_elements) { char errMsg[64]; From 4057b4792675fe98fac62a1283d371cac10c1c83 Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 10:14:27 +0200 Subject: [PATCH 08/13] Updated doc --- pyat/at/tracking/track.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyat/at/tracking/track.py b/pyat/at/tracking/track.py index 55c249a33..6eaaa9782 100755 --- a/pyat/at/tracking/track.py +++ b/pyat/at/tracking/track.py @@ -233,6 +233,12 @@ def lattice_track( calculation or to solve Runtime Errors, however it is considered unsafe. Used only when *use_mp* is :py:obj:`True`. It can be globally set using the variable *at.lattice.DConstant.patpass_startmethod* + start_elem (int): Start tracking at the specified element. The + behavior is different from :py:func:`..lattice.rotate`, refpts are + still indexed from the begininig of the ring. Default 0. + end_elem (int): End tracking at the specified element (exclusive). + When end_elem is different from the number of lattice elements, + :pycode:`nturns=1` is required. Default number of elements. The following keyword arguments overload the lattice values From bbea829159ba479d66d762bac9368bed0d76af14 Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 10:31:57 +0200 Subject: [PATCH 09/13] Updated stubs --- pyat/at/tracking/atpass.pyi | 2 ++ pyat/at/tracking/gpu.pyi | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyat/at/tracking/atpass.pyi b/pyat/at/tracking/atpass.pyi index 8c882b8ca..081e99f24 100644 --- a/pyat/at/tracking/atpass.pyi +++ b/pyat/at/tracking/atpass.pyi @@ -22,6 +22,8 @@ def atpass( losses: bool = False, bunch_spos=None, bunch_current=None, + start_elem: int | None = None, + end_elem: int | None = None, ): ... def elempass( element: Element, diff --git a/pyat/at/tracking/gpu.pyi b/pyat/at/tracking/gpu.pyi index c17281dba..b8948776b 100644 --- a/pyat/at/tracking/gpu.pyi +++ b/pyat/at/tracking/gpu.pyi @@ -21,7 +21,8 @@ def gpupass( bunch_spos=None, bunch_current=None, gpu_pool: list[int] | None = None, - tracking_starts=None, integrator=4, - verbose: bool = False + verbose: bool = False, + start_elem: int | None = None, + end_elem: int | None = None ): ... From 2a9141d8d94f40a90ab378762565d152897923c2 Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 10:54:12 +0200 Subject: [PATCH 10/13] Added unit test --- pyat/test/test_atpass.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyat/test/test_atpass.py b/pyat/test/test_atpass.py index 719924dbc..0ef2c1c04 100644 --- a/pyat/test/test_atpass.py +++ b/pyat/test/test_atpass.py @@ -153,3 +153,19 @@ def test_1d_particle(): rout_expected = numpy.array([1e-6, 1e-6, 0, 0, 0, 5e-13]) # rin is changed in place numpy.testing.assert_equal(rin, rout_expected) + +def test_start_elem(hmba_lattice): + hmba_lattice = hmba_lattice.radiation_off(copy=True) + rin = numpy.array([0,1e-6,0,0,0,0]) + rout = atpass(hmba_lattice, rin, 1, start_elem=100) + assert rout.shape == (6, 1, 0, 1) + expected = numpy.array([ 3.2e-6, -1.1e-6, 0, 0,0, 8.4e-8]) + numpy.testing.assert_allclose(rin, expected, atol=1e-7) + +def test_end_elem(hmba_lattice): + hmba_lattice = hmba_lattice.radiation_off(copy=True) + rin = numpy.array([0,1e-6,0,0,0,0]) + rout = atpass(hmba_lattice, rin, 1, start_elem=100, end_elem=110) + assert rout.shape == (6, 1, 0, 1) + expected = numpy.array([2.6e-6, 1.34e-6, 0, 0,0, 2.4e-8]) + numpy.testing.assert_allclose(rin, expected, atol=1e-7) From 4ecb09a2acbdaf3d14bb084dc4a17b26dcb81b00 Mon Sep 17 00:00:00 2001 From: PONS Date: Wed, 20 Aug 2025 14:35:53 +0200 Subject: [PATCH 11/13] Updated doc --- atgpu/PyInterface.cpp | 9 +++++++-- pyat/at/tracking/track.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp index 60782e0d4..1bb62e1e6 100755 --- a/atgpu/PyInterface.cpp +++ b/atgpu/PyInterface.cpp @@ -48,8 +48,13 @@ static PyMethodDef AtGPUMethods[] = { " 4: Forest/Ruth 4th order, 4 drifts/3 kicks per step (Default).\n" " 5: Optimal 4th order from R. Mclachlan, 4 drifts/4 kicks per step.\n" " 6: Yoshida 6th order, 8 drifts/7 kicks per step.\n" - " start_elem: start tracking at element #start_elem\n" - " end_elem: end tracking at element #end_elem (exclusive)\n\n" + " start_elem: Start tracking at the specified element. The\n" + " behavior is different from :py:func:`..lattice.rotate`, refpts are\n" + " still indexed from the begininig of the ring and s_coord starts at\n" + " the beginning of the ring. Default 0.\n" + " end_elem: End tracking at the specified element (exclusive).\n" + " When end_elem is different from the number of lattice elements,\n" + " nturns=1 is required. Default number of elements.\n\n" "Returns:\n" " rout: 6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n" " of particle coordinates\n\n" diff --git a/pyat/at/tracking/track.py b/pyat/at/tracking/track.py index 6eaaa9782..d39d2c2c9 100755 --- a/pyat/at/tracking/track.py +++ b/pyat/at/tracking/track.py @@ -235,7 +235,8 @@ def lattice_track( set using the variable *at.lattice.DConstant.patpass_startmethod* start_elem (int): Start tracking at the specified element. The behavior is different from :py:func:`..lattice.rotate`, refpts are - still indexed from the begininig of the ring. Default 0. + still indexed from the begininig of the ring and s_coord starts at + the beginning of the ring. Default 0. end_elem (int): End tracking at the specified element (exclusive). When end_elem is different from the number of lattice elements, :pycode:`nturns=1` is required. Default number of elements. From 9a819343b96dc3f55d57626ec52b687a89cfdade Mon Sep 17 00:00:00 2001 From: PONS Date: Thu, 21 Aug 2025 14:34:38 +0200 Subject: [PATCH 12/13] Added unit test for errors --- pyat/test/test_atpass.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyat/test/test_atpass.py b/pyat/test/test_atpass.py index 0ef2c1c04..cff680699 100644 --- a/pyat/test/test_atpass.py +++ b/pyat/test/test_atpass.py @@ -169,3 +169,16 @@ def test_end_elem(hmba_lattice): assert rout.shape == (6, 1, 0, 1) expected = numpy.array([2.6e-6, 1.34e-6, 0, 0,0, 2.4e-8]) numpy.testing.assert_allclose(rin, expected, atol=1e-7) + +def test_start_end_elem_errors(hmba_lattice): + hmba_lattice = hmba_lattice.radiation_off(copy=True) + rin = numpy.array([0,1e-6,0,0,0,0]) + with pytest.raises(ValueError): + rout = atpass(hmba_lattice, rin, 1, start_elem=200) + with pytest.raises(ValueError): + rout = atpass(hmba_lattice, rin, 1, end_elem=200) + with pytest.raises(ValueError): + rout = atpass(hmba_lattice, rin, 1, start_elem=100, end_elem=10) + with pytest.raises(ValueError): + rout = atpass(hmba_lattice, rin, 2, end_elem=10) + From d6616de4bae4736dac94fd892a6d7f88abbe9cd9 Mon Sep 17 00:00:00 2001 From: PONS Date: Thu, 21 Aug 2025 14:35:05 +0200 Subject: [PATCH 13/13] Updated doc --- pyat/at.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyat/at.c b/pyat/at.c index 072ae2efd..778790acf 100644 --- a/pyat/at.c +++ b/pyat/at.c @@ -872,7 +872,14 @@ static PyMethodDef AtMethods[] = { " particle (Optional[Particle]): circulating particle\n" " reuse: if True, use previously cached description of the lattice.\n" " omp_num_threads: number of OpenMP threads (default 0: automatic)\n" - " losses: if True, process losses\n\n" + " losses: if True, process losses\n" + " start_elem: Start tracking at the specified element. The\n" + " behavior is different from :py:func:`..lattice.rotate`, refpts are\n" + " still indexed from the begininig of the ring and s_coord starts at\n" + " the beginning of the ring. Default 0.\n" + " end_elem: End tracking at the specified element (exclusive).\n" + " When end_elem is different from the number of lattice elements,\n" + " nturns=1 is required. Default number of elements.\n\n" "Returns:\n" " rout: 6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n" " of particle coordinates\n\n"