diff --git a/atgpu/Lattice.cpp b/atgpu/Lattice.cpp
index 3e63fc815..d01baf7fe 100755
--- a/atgpu/Lattice.cpp
+++ b/atgpu/Lattice.cpp
@@ -200,7 +200,6 @@ void Lattice::generateGPUKernel() {
   // GPU main track function
   code.append(kType + "void track(" + mType + "RING_PARAM *ringParam," + mType + "ELEMENT* gpuRing,\n"
               "                   uint32_t startElem,uint32_t nbElementToProcess,\n"
-              "                   " + mType + "int32_t *elemOffsets, uint32_t offsetStride,\n"
               "                   uint32_t nbPart," + mType + "AT_FLOAT* rin," + mType + "AT_FLOAT* rout,\n"
               "                   " + mType + "uint32_t* lost, uint32_t turn,\n"
               "                   " + mType + "int32_t *refpts, uint32_t nbRef,\n"
@@ -228,12 +227,9 @@ void Lattice::generateGPUKernel() {
   code.append("  sr6[4] = rin[4 + 6*threadId];\n"); // d = (pz-p0)/p0
   code.append("  sr6[5] = rin[5 + 6*threadId];\n"); // c.tau (time lag)
 
-  // The tracking starts at elem elemOffset (equivalent to lattice.rotate(elemOffset))
-  code.append("  int32_t elemOffset = elemOffsets?elemOffsets[threadId/offsetStride]:0;\n");
-
   // Loop over elements
   code.append("  while(!pLost && elem<startElem+nbElementToProcess) {\n");
-  code.append("    "+mType+" ELEMENT* elemPtr = gpuRing + ((elem+elemOffset)%NB_TOTAL_ELEMENT);\n");
+  code.append("    "+mType+" ELEMENT* elemPtr = gpuRing + elem;\n");
   storeParticleCoord(code);
   code.append("    switch(elemPtr->Type) {\n");
   factory.generatePassMethodsCalls(code);
@@ -307,7 +303,7 @@ uint64_t Lattice::fillGPUMemory() {
 }
 
 void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,
-                  uint32_t *refPts,uint32_t nbElemOffset,uint32_t *elemOffsets,
+                  uint32_t *refPts,uint32_t startElem,uint32_t endElem,
                   uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord,
                   bool updateRin) {
 
@@ -371,28 +367,14 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r
   if(lostAtElem) gpu->allocDevice(&gpuLostAtElem, nbParticles * sizeof(uint32_t));
   if(lostAtCoord) gpu->allocDevice(&gpuLostAtCoord, nbParticles * 6 * sizeof(AT_FLOAT));
 
-  // Starting element (lattice rotation)
-  void *gpuOffsets = nullptr;
-  uint32_t offsetStride = 0;
-  if(nbElemOffset) {
-    gpu->allocDevice(&gpuOffsets, nbElemOffset * sizeof(int32_t));
-    gpu->hostToDevice(gpuOffsets, elemOffsets, nbElemOffset * sizeof(int32_t));
-    // nbParticles % nbElemOffset must be 0
-    // offsetStride (nbParticles/nbElemOffset) should be a multiple of 64
-    offsetStride = nbParticles / nbElemOffset;
-  }
-
   // Call GPU
   gpu->resetArg();
-  uint32_t startElem;
-  uint32_t nbElemToProcess = (uint32_t)elements.size();
+  uint32_t nbElemToProcess = endElem - startElem;
   uint32_t turn;
   gpu->addArg(sizeof(void *),&gpuRingParams);       // Global ring params
   gpu->addArg(sizeof(void *),&gpuRing);             // The lattice
   gpu->addArg(sizeof(uint32_t),&startElem);         // Process from
   gpu->addArg(sizeof(uint32_t),&nbElemToProcess);   // Number of element to process
-  gpu->addArg(sizeof(void *),&gpuOffsets);          // Tracking start offsets
-  gpu->addArg(sizeof(uint32_t),&offsetStride);      // Number of particle which starts at elem specified in gpuOffsets
   gpu->addArg(sizeof(uint32_t),&nbParticles);       // Total number of particle to track
   gpu->addArg(sizeof(void *),&gpuRin);              // Input 6D coordinates
   gpu->addArg(sizeof(void *),&gpuRout);             // Output 6D coordinates
@@ -405,8 +387,9 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r
 
   // Turn loop
   for(turn=0;turn<nbTurn;turn++) {
-    startElem = 0;
     gpu->run(nbParticles); // 1 particle per thread
+    startElem = 0;
+    nbElemToProcess = (uint32_t)elements.size();
   }
 
   // Get back data
@@ -441,7 +424,6 @@ void Lattice::run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r
   gpu->freeDevice(gpuRingParams);
   if( gpuLostAtElem ) gpu->freeDevice(gpuLostAtElem);
   if( gpuLostAtCoord ) gpu->freeDevice(gpuLostAtCoord);
-  if( gpuOffsets ) gpu->freeDevice(gpuOffsets);
   delete[] expandedRefPts;
   delete[] lost;
 
diff --git a/atgpu/Lattice.h b/atgpu/Lattice.h
index e3e2b5bed..e5785854c 100644
--- a/atgpu/Lattice.h
+++ b/atgpu/Lattice.h
@@ -27,7 +27,7 @@ class Lattice {
   void generateGPUKernel();
   // Run the simulation
   void run(uint64_t nbTurn,uint32_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,uint32_t *refPts,
-           uint32_t nbElemOffset,uint32_t *elemOffsets,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord,
+           uint32_t startElem,uint32_t endElem,uint32_t *lostAtTurn,uint32_t *lostAtElem,AT_FLOAT *lostAtCoord,
            bool updateRin);
   // Return handle to the GPU context
   GPUContext *getGPUContext();
diff --git a/atgpu/PyInterface.cpp b/atgpu/PyInterface.cpp
index 539b78b40..1bb62e1e6 100755
--- a/atgpu/PyInterface.cpp
+++ b/atgpu/PyInterface.cpp
@@ -41,18 +41,20 @@ static PyMethodDef AtGPUMethods[] = {
                           "    reuse:   if True, use previously cached description of the lattice.\n"
                           "    losses:  if True, process losses\n"
                           "    gpu_pool:  List of GPU id to use\n"
-                          "    tracking_starts: numpy array of indices of elements where tracking should start.\n"
-                          "       len(tracking_start) must divide the number of particle and it gives the stride size.\n"
-                          "       The i-th particle of rin starts at elem tracking_start[i/stride].\n"
-                          "       The behavior is similar to lattice.rotate(tracking_starts[i/stride]).\n"
-                          "       The stride size should be multiple of 64 for best performance.\n"
                           "    integrator: Type of integrator to use.\n"
                           "       1: Euler 1st order, 1 drift/1 kick per step.\n"
                           "       2: Mclachlan 2nd order, 2 drift/2 kicks per step.\n"
                           "       3: Ruth 3rd order, 3 drifts/3 kicks per step.\n"
                           "       4: Forest/Ruth 4th order, 4 drifts/3 kicks per step (Default).\n"
                           "       5: Optimal 4th order from R. Mclachlan, 4 drifts/4 kicks per step.\n"
-                          "       6: Yoshida 6th order, 8 drifts/7 kicks per step.\n\n"
+                          "       6: Yoshida 6th order, 8 drifts/7 kicks per step.\n"
+                          "    start_elem: Start tracking at the specified element. The\n"
+                          "       behavior is different from :py:func:`..lattice.rotate`, refpts are\n"
+                          "       still indexed from the begininig of the ring and s_coord starts at\n"
+                          "       the beginning of the ring. Default 0.\n"
+                          "    end_elem: End tracking at the specified element (exclusive).\n"
+                          "       When end_elem is different from the number of lattice elements,\n"
+                          "       nturns=1 is required. Default number of elements.\n\n"
                           "Returns:\n"
                           "    rout:    6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n"
                           "         of particle coordinates\n\n"
@@ -138,7 +140,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
                                  "energy", "particle", "keep_counter",
                                  "reuse","losses",
                                  "bunch_spos", "bunch_currents", "gpu_pool",
-                                 "tracking_starts","integrator","verbose",
+                                 "integrator","verbose",
+                                 "start_elem","end_elem",
                                  nullptr};
 
   NPY_TYPES floatType;
@@ -168,10 +171,12 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
   int counter=0;
   int losses=0;
   int integratorType=4;
+  int startElem=0;
+  int endElem=-1;
   double t0,t1;
 
   // Get input args
-  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!pppO!O!O!O!ip", const_cast<char **>(kwlist),
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!pppO!O!O!ipii", const_cast<char **>(kwlist),
                                    &PyList_Type, &lattice,
                                    &PyArray_Type, &rin,
                                    &num_turns,
@@ -185,9 +190,10 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
                                    &PyArray_Type, &bspos,
                                    &PyArray_Type, &bcurrents,
                                    &PyList_Type, &gpupool,
-                                   &PyArray_Type, &trackstarts,
                                    &integratorType,
-                                   &verbose
+                                   &verbose,
+                                   &startElem,
+                                   &endElem
                                    )) {
     return nullptr;
   }
@@ -292,6 +298,32 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
 
   }
 
+  int num_elements = gpuLattice->getNbElement();
+
+  // Check for partial turn tracking
+  if(endElem==-1) endElem = num_elements;
+  if(num_elements!=0) {
+    if(startElem<0 || startElem>=num_elements) {
+      char errMsg[64];
+      sprintf(errMsg,"start_elem out of range, %d not in [0..%d]",startElem,num_elements-1);
+      return PyErr_Format(PyExc_ValueError, errMsg);
+    }
+    if(endElem<0 || endElem>num_elements) {
+      char errMsg[64];
+      sprintf(errMsg,"end_elem out of range, %d not in [0..%d]",endElem,num_elements);
+      return PyErr_Format(PyExc_ValueError, errMsg);
+    }
+    if(startElem>endElem) {
+      return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem");
+    }
+    if((endElem!=num_elements) && (num_turns!=1)) {
+      return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1");
+    }
+  } else {
+    startElem = 0;
+    endElem = 0;
+  }
+
   // Load lattice on the GPU
   try {
     uint64_t size = gpuLattice->fillGPUMemory();
@@ -309,7 +341,8 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
   try {
 
     if( verbose )
-      cout << "Tracking " << num_particles << " particles for " << num_turns << " turns on " << gpuLattice->getGPUContext()->name() << " #" << gpuId << endl;
+      cout << "Tracking " << num_particles << " particles for " << num_turns << " turns on " <<
+      gpuLattice->getGPUContext()->name() << " #" << gpuId << " Integrator: #" << integratorType << endl;
 
     npy_intp outdims[4] = {6,(npy_intp)(num_particles),num_refs,num_turns};
     PyObject *rout = PyArray_EMPTY(4, outdims, floatType, 1);
@@ -330,7 +363,7 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
       bool *xlostPtr = (bool *)PyArray_DATA((PyArrayObject *)xlost);
       AT_FLOAT *xlostcoordPtr = (AT_FLOAT *)PyArray_DATA((PyArrayObject *)xlostcoord);
 
-      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,
+      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,startElem,endElem,
                       xnturnPtr,xnelemPtr,xlostcoordPtr,true);
 
       // Format result for AT
@@ -355,7 +388,7 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
 
     } else {
 
-      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,num_starts,track_starts,
+      gpuLattice->run(num_turns,num_particles,drin,drout,num_refs,ref_pts,startElem,endElem,
                       nullptr,nullptr,nullptr,true);
       return rout;
 
diff --git a/atgpu/main.cpp b/atgpu/main.cpp
index 1e3a85e2b..0630df41f 100755
--- a/atgpu/main.cpp
+++ b/atgpu/main.cpp
@@ -134,7 +134,7 @@ void integratorTest(int gpu,string latticeName) {
       // Choose an arc close to 1mm where unexpected tune drift is observed when step size in too small (EBS lattice)
       AT_FLOAT *rin = createArc(0.001,M_PI/2.0,-M_PI/2.0,nbPart);
 
-      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, nullptr, nullptr, nullptr, nullptr, false);
+      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, l->getNbElement()-1, nullptr, nullptr, nullptr, false);
 
       double err = 0;
       double max = 0;
@@ -209,9 +209,6 @@ void performanceTest(int gpu,string latticeName) {
       uint32_t nbPart = nbX * nbY;
       uint32_t refs[] = {l->getNbElement()};
       uint32_t nbRef = sizeof(refs) / sizeof(uint32_t);
-      uint32_t starts[] = {0,100,200,300,400,500,600,700};
-      uint32_t nbStride = sizeof(starts) / sizeof(uint32_t);
-      //uint32_t nbStride = 0;
 
       uint64_t routSize = nbTurn * nbPart * nbRef * 6 * sizeof(AT_FLOAT);
       AT_FLOAT *rout = (AT_FLOAT *) malloc(routSize);
@@ -222,7 +219,7 @@ void performanceTest(int gpu,string latticeName) {
       AT_FLOAT *lostAtCoord = new AT_FLOAT[nbPart * 6];
 
       t0 = AbstractGPU::get_ticks();
-      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, nbStride, starts, lostAtTurn, lostAtElem, lostAtCoord,false);
+      l->run(nbTurn, nbPart, rin, rout, nbRef, refs, 0, l->getNbElement()-1, lostAtTurn, lostAtElem, lostAtCoord,false);
       t1 = AbstractGPU::get_ticks();
 
       AT_FLOAT *P = &rin[114*6];
@@ -231,9 +228,8 @@ void performanceTest(int gpu,string latticeName) {
       cout << "Lost elem:" << lostAtElem[114] << endl;
       cout << "Lost coord:" << lostAtCoord[114*6] << "," << lostAtCoord[114*6+2] << endl;
 
-      uint32_t strideSize = nbPart / nbStride;
-      for(int stride=0; stride < nbStride; stride++) {
-        P = ROUTPTR(stride*strideSize, 0, nbTurn - 1);
+      for(int stride=0; stride < 8; stride++) {
+        P = ROUTPTR(stride*8, 0, nbTurn - 1);
         cout << "[" << stride << "] " << P[0] << " " << P[1] << " " << P[2] << " " << P[3] << " " << P[4] << " " << P[5] << endl;
       }
 
diff --git a/pyat/at.c b/pyat/at.c
index 8a7d673a2..778790acf 100644
--- a/pyat/at.c
+++ b/pyat/at.c
@@ -332,12 +332,14 @@ void set_current_fillpattern(PyArrayObject *bspos, PyArrayObject *bcurrents,
  *  - nturns: int number of turns to simulate
  *  - refpts: numpy uint32 array denoting elements at which to return state
  *  - reuse: whether to reuse the cached state of the ring
+ *  - start_elem: start tracking at element #start_elem
+ *  - end_elem: end tracking at element #end_elem (exclusive)
  */
 static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
     static char *kwlist[] = {"line","rin","nturns","refpts","turn",
                              "energy", "particle", "keep_counter",
                              "reuse","omp_num_threads","losses",
-                             "bunch_spos", "bunch_currents", NULL};
+                             "bunch_spos", "bunch_currents", "start_elem", "end_elem", NULL};
     static double lattice_length = 0.0;
     static int last_turn = 0;
     static int valid = 0;
@@ -363,6 +365,8 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
     npy_uint32 omp_num_threads=0;
     npy_uint32 num_particles, np6;
     npy_uint32 elem_index;
+    npy_uint32 start_elem = 0;
+    npy_uint32 end_elem = 0;
     npy_uint32 *refpts = NULL;
     npy_uint32 nextref;
     unsigned int nextrefindex;
@@ -387,12 +391,13 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
     bspos=NULL;
     bcurrents=NULL;
     
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!ppIpO!O!", kwlist,
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!O!i|O!$iO!O!ppIpO!O!ii", kwlist,
         &PyList_Type, &lattice, &PyArray_Type, &rin, &num_turns,
         &PyArray_Type, &refs, &counter,
         &PyFloat_Type ,&energy, particle_type, &particle,
         &keep_counter, &keep_lattice, &omp_num_threads, &losses,
-        &PyArray_Type, &bspos, &PyArray_Type, &bcurrents)) {
+        &PyArray_Type, &bspos, &PyArray_Type, &bcurrents,
+        &start_elem,&end_elem)) {
         return NULL;
     }
     if (PyArray_DIM(rin,0) != 6) {
@@ -411,61 +416,14 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
     param.rest_energy=0.0;
     param.charge=-1.0;
     param.num_turns=num_turns;
-    param.bdiff = NULL;
-    
+    param.bdiff = NULL;    
     if (keep_counter)
         param.nturn = last_turn;
     else
         param.nturn = counter;
 
-    set_energy_particle(lattice, energy, particle, &param);
     set_current_fillpattern(bspos, bcurrents, &param);
 
-    num_particles = (PyArray_SIZE(rin)/6);
-    np6 = num_particles*6;
-    drin = PyArray_DATA(rin);
-
-    if (refs) {
-        if (PyArray_TYPE(refs) != NPY_UINT32) {
-            return PyErr_Format(PyExc_ValueError, "refpts is not a uint32 array");
-        }
-        refpts = PyArray_DATA(refs);
-        num_refpts = PyArray_SIZE(refs);
-    }
-    else {
-        refpts = NULL;
-        num_refpts = 0;
-    }
-    outdims[0] = 6;
-    outdims[1] = num_particles;
-    outdims[2] = num_refpts;
-    outdims[3] = num_turns;
-    rout = PyArray_EMPTY(4, outdims, NPY_DOUBLE, 1);
-    drout = PyArray_DATA((PyArrayObject *)rout);
-
-    if(losses){
-        pdims[0]= num_particles;
-        lxdims[0]= 6;
-        lxdims[1]= num_particles;
-        xnturn = PyArray_EMPTY(1, pdims, NPY_UINT32, 1);
-        xnelem = PyArray_EMPTY(1, pdims, NPY_UINT32, 1);
-        xlost = PyArray_EMPTY(1, pdims, NPY_BOOL, 1);
-        xlostcoord = PyArray_EMPTY(2, lxdims, NPY_DOUBLE, 1);
-        ixnturn = PyArray_DATA((PyArrayObject *)xnturn);
-        ixnelem = PyArray_DATA((PyArrayObject *)xnelem);
-        bxlost = PyArray_DATA((PyArrayObject *)xlost);
-        dxlostcoord = PyArray_DATA((PyArrayObject *)xlostcoord);
-        unsigned int i;
-        static double r0[6];
-        for(i=0;i<num_particles;i++){
-            bxlost[i]=0;
-            ixnturn[i]=0;
-            ixnelem[i]=0;
-            memcpy(dxlostcoord+6*i,r0,6*sizeof(double));
-        }
-    }
-
-
     #ifdef _OPENMP
     if ((omp_num_threads > 0) && (num_particles > OMP_PARTICLE_THRESHOLD)) {
         unsigned int nthreads = omp_get_num_procs();
@@ -521,10 +479,10 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
             PyObject *el = PyList_GET_ITEM(lattice, elem_index);
             PyObject *PyPassMethod = PyObject_GetAttrString(el, "PassMethod");
             double length;
-            if (!PyPassMethod) return print_error(elem_index, rout);    /* No PassMethod: AttributeError */
+            if (!PyPassMethod) return NULL;    /* No PassMethod: AttributeError */
             LibraryListPtr = get_track_function(PyUnicode_AsUTF8(PyPassMethod));
             Py_DECREF(PyPassMethod);
-            if (!LibraryListPtr) return print_error(elem_index, rout);  /* No trackFunction for the given PassMethod: RuntimeError */
+            if (!LibraryListPtr) return NULL;  /* No trackFunction for the given PassMethod: RuntimeError */
             pylength = PyObject_GetAttrString(el, "Length");
             if (pylength) {
                 length = PyFloat_AsDouble(pylength);
@@ -544,18 +502,88 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
         valid = 0;
     }
 
+    /* Check for partial turn tracking */
+    if(end_elem==0) end_elem = num_elements;
+    if(num_elements!=0) {
+        if(start_elem<0 || start_elem>=num_elements) {
+            char errMsg[64];
+            sprintf(errMsg,"start_elem out of range, %d not in [0..%d]",start_elem,num_elements-1);
+            return PyErr_Format(PyExc_ValueError, errMsg);
+        }
+        if(end_elem<0 || end_elem>num_elements) {
+            char errMsg[64];
+            sprintf(errMsg,"end_elem out of range, %d not in [0..%d]",start_elem,num_elements);
+            return PyErr_Format(PyExc_ValueError, errMsg);
+        }
+        if(start_elem>end_elem) {
+            return PyErr_Format(PyExc_ValueError, "end_elem must be greater that start_elem");
+        }
+        if((end_elem!=num_elements) && (num_turns!=1)) {
+            return PyErr_Format(PyExc_ValueError, "partial turn tracking not ending at the end of the ring requires nturns=1");
+        }
+    } else {
+        start_elem = 0;
+        end_elem = 0;
+    }
+
+    /* Particle energy */
     param.RingLength = lattice_length;
     if (param.rest_energy == 0.0) {
         param.T0 = param.RingLength/C0;
-    }
-    else {
+    } else {
         double gamma0 = param.energy/param.rest_energy;
         double betagamma0 = sqrt(gamma0*gamma0 - 1.0);
         double beta0 = betagamma0/gamma0;
         param.T0 = param.RingLength/beta0/C0;
     }
+    set_energy_particle(lattice, energy, particle, &param);
+
+    /* Memory allocation for outputs */
+    num_particles = (PyArray_SIZE(rin)/6);
+    np6 = num_particles*6;
+    drin = PyArray_DATA(rin);
+
+    if (refs) {
+        if (PyArray_TYPE(refs) != NPY_UINT32) {
+            return PyErr_Format(PyExc_ValueError, "refpts is not a uint32 array");
+        }
+        refpts = PyArray_DATA(refs);
+        num_refpts = PyArray_SIZE(refs);
+    } else {
+        refpts = NULL;
+        num_refpts = 0;
+    }
+    outdims[0] = 6;
+    outdims[1] = num_particles;
+    outdims[2] = num_refpts;
+    outdims[3] = num_turns;
+    rout = PyArray_EMPTY(4, outdims, NPY_DOUBLE, 1);
+    drout = PyArray_DATA((PyArrayObject *)rout);
+
+    if(losses){
+        pdims[0]= num_particles;
+        lxdims[0]= 6;
+        lxdims[1]= num_particles;
+        xnturn = PyArray_EMPTY(1, pdims, NPY_UINT32, 1);
+        xnelem = PyArray_EMPTY(1, pdims, NPY_UINT32, 1);
+        xlost = PyArray_EMPTY(1, pdims, NPY_BOOL, 1);
+        xlostcoord = PyArray_EMPTY(2, lxdims, NPY_DOUBLE, 1);
+        ixnturn = PyArray_DATA((PyArrayObject *)xnturn);
+        ixnelem = PyArray_DATA((PyArrayObject *)xnelem);
+        bxlost = PyArray_DATA((PyArrayObject *)xlost);
+        dxlostcoord = PyArray_DATA((PyArrayObject *)xlostcoord);
+        unsigned int i;
+        static double r0[6];
+        for(i=0;i<num_particles;i++){
+            bxlost[i]=0;
+            ixnturn[i]=0;
+            ixnelem[i]=0;
+            memcpy(dxlostcoord+6*i,r0,6*sizeof(double));
+        }
+    }
 
     for (turn = 0; turn < num_turns; turn++) {
+
         PyObject **element = element_list;
         double *elem_length = elemlength_list;
         track_function *integrator = integrator_list;
@@ -564,10 +592,21 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
         struct elem **elemdata = elemdata_list;
         double s_coord = 0.0;
 
-      /*PySys_WriteStdout("turn: %i\n", param.nturn);*/
+        if(start_elem!=0) {
+            /* Update pointers and s_coord when start_elem is not 0 */
+            for(elem_index = 0;elem_index<start_elem;elem_index++ ) {
+                s_coord += *elem_length++;
+                element++;
+                integrator++;
+                pyintegrator++;
+                elemdata++;
+            }
+        }
+
+        /*PySys_WriteStdout("turn: %i\n", param.nturn);*/
         nextrefindex = 0;
         nextref= (nextrefindex<num_refpts) ? refpts[nextrefindex++] : INT_MAX;
-        for (elem_index = 0; elem_index < num_elements; elem_index++) {
+        for (elem_index = start_elem; elem_index < end_elem; elem_index++) {
             param.s_coord = s_coord;
             if (elem_index == nextref) {
                 memcpy(drout, drin, np6*sizeof(double));
@@ -593,7 +632,6 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
             integrator++;
             pyintegrator++;
             elemdata++;
-            kwargs++;
         }
         /* the last element in the ring */
         if (num_elements == nextref) {
@@ -601,6 +639,7 @@ static PyObject *at_atpass(PyObject *self, PyObject *args, PyObject *kwargs) {
             drout += np6; /*  shift the location to write to in the output array */
         }
         param.nturn++;
+        start_elem = 0;
     }
     valid = 1;      /* Tracking successful: the lattice can be reused */
     last_turn = param.nturn;  /* Store turn number in a static variable */
@@ -833,7 +872,14 @@ static PyMethodDef AtMethods[] = {
               "    particle (Optional[Particle]):  circulating particle\n"
               "    reuse:   if True, use previously cached description of the lattice.\n"
               "    omp_num_threads: number of OpenMP threads (default 0: automatic)\n"
-              "    losses:  if True, process losses\n\n"
+              "    losses:  if True, process losses\n"
+              "    start_elem: Start tracking at the specified element. The\n"
+              "       behavior is different from :py:func:`..lattice.rotate`, refpts are\n"
+              "       still indexed from the begininig of the ring and s_coord starts at\n"
+              "       the beginning of the ring. Default 0.\n"
+              "    end_elem: End tracking at the specified element (exclusive).\n"
+              "       When end_elem is different from the number of lattice elements,\n"
+              "       nturns=1 is required. Default number of elements.\n\n"
               "Returns:\n"
               "    rout:    6 x n_particles x n_refpts x n_turns Fortran-ordered numpy array\n"
               "         of particle coordinates\n\n"
diff --git a/pyat/at/acceptance/acceptance.py b/pyat/at/acceptance/acceptance.py
index 773b6a255..25f228bc6 100644
--- a/pyat/at/acceptance/acceptance.py
+++ b/pyat/at/acceptance/acceptance.py
@@ -41,6 +41,7 @@ def get_acceptance(
     divider: int = 2,
     shift_zero: float = 1.0e-6,
     start_method: str | None = None,
+    **kwargs
 ):
     # noinspection PyUnresolvedReferences
     r"""Computes the acceptance at ``repfts`` observation points
@@ -116,7 +117,6 @@ def get_acceptance(
          do most of the work in a single function call and allows for
          full parallelization.
     """
-    kwargs = {}
     if start_method is not None:
         kwargs["start_method"] = start_method
 
@@ -198,6 +198,7 @@ def get_1d_acceptance(
     divider: int | None = 2,
     shift_zero: float | None = 1.0e-6,
     start_method: str | None = None,
+    **kwargs
 ):
     r"""Computes the 1D acceptance at ``refpts`` observation points
 
@@ -289,6 +290,7 @@ def get_1d_acceptance(
         divider=divider,
         shift_zero=shift_zero,
         offset=offset,
+        **kwargs
     )
     return np.squeeze(b), s, g
 
diff --git a/pyat/at/acceptance/boundary.py b/pyat/at/acceptance/boundary.py
index a2e7c3fd7..b2b9d5f55 100644
--- a/pyat/at/acceptance/boundary.py
+++ b/pyat/at/acceptance/boundary.py
@@ -340,10 +340,13 @@ def grid_boundary_search(
     t0 = time.time()
     allparts = []
     grids = []
-    offsets = []
 
-    for i, obs, orbit in zip(numpy.arange(len(obspt)), obspt, offset):
-        orbit, newring = set_ring_orbit(ring, dp, obs, orbit)
+    if offset is None:
+        _, offsets = ring.find_orbit(dp=dp, refpts=obspt)
+    else:
+        offsets = offset
+
+    for i, obs, orbit in zip(numpy.arange(len(obspt)), obspt, offsets):
         parts, grid = get_parts(config, orbit)
         obs = 0 if obs is None else obs
         dpp = 0.0 if dp is None else dp
@@ -354,12 +357,17 @@ def grid_boundary_search(
                     ring[obs].FamName, obs, orbit, dpp, i + 1, len(obspt)
                 )
             )
-        newring[: len(ring) - obs].track(
-            parts, use_mp=use_mp, in_place=True, refpts=None, **kwargs
+        ring.track(
+            parts,
+            use_mp=use_mp,
+            in_place=True,
+            refpts=None,
+            start_elem=obs,
+            keep_lattice=True,
+            **kwargs,
         )
         allparts.append(parts)
         grids.append(grid)
-        offsets.append(orbit)
     if verbose:
         print("Starting the multi-turn tracking...")
     allparts = numpy.concatenate(allparts, axis=1)
@@ -399,7 +407,6 @@ def recursive_boundary_search(
     def search_boundary(
         planesi, angles, rtol, rsteps, nturns, offset, use_mp, **kwargs
     ):
-
         ftol = min(rtol / rsteps)
         cs = numpy.squeeze([numpy.cos(angles), numpy.sin(angles)])
         cs = numpy.around(cs, decimals=9)
@@ -521,14 +528,14 @@ def boundary_search(
                 numpy.shape(offset), numpy.shape(obspt)
             )
             raise AtError(msg)
-    else:
-        offset = [None for _ in rp]
 
     divider = kwargs.pop("divider", 2)
     if grid_mode is GridMode.RECURSIVE:
         boundary = []
         survived = []
         grid = []
+        if offset is None:
+            offset = [None for _ in rp]
         for r, o in zip(rp, offset):
             b, s, g = recursive_boundary_search(
                 ring,
diff --git a/pyat/at/tracking/atpass.pyi b/pyat/at/tracking/atpass.pyi
index 8c882b8ca..081e99f24 100644
--- a/pyat/at/tracking/atpass.pyi
+++ b/pyat/at/tracking/atpass.pyi
@@ -22,6 +22,8 @@ def atpass(
     losses: bool = False,
     bunch_spos=None,
     bunch_current=None,
+    start_elem: int | None = None,
+    end_elem: int | None = None,
 ): ...
 def elempass(
     element: Element,
diff --git a/pyat/at/tracking/gpu.pyi b/pyat/at/tracking/gpu.pyi
index c17281dba..b8948776b 100644
--- a/pyat/at/tracking/gpu.pyi
+++ b/pyat/at/tracking/gpu.pyi
@@ -21,7 +21,8 @@ def gpupass(
     bunch_spos=None,
     bunch_current=None,
     gpu_pool: list[int] | None = None,
-    tracking_starts=None,
     integrator=4,
-    verbose: bool = False
+    verbose: bool = False,
+    start_elem: int | None = None,
+    end_elem: int | None = None
 ): ...
diff --git a/pyat/at/tracking/track.py b/pyat/at/tracking/track.py
index 55c249a33..d39d2c2c9 100755
--- a/pyat/at/tracking/track.py
+++ b/pyat/at/tracking/track.py
@@ -233,6 +233,13 @@ def lattice_track(
           calculation or to solve Runtime Errors, however it is considered
           unsafe. Used only when *use_mp* is :py:obj:`True`. It can be globally
           set using the variable *at.lattice.DConstant.patpass_startmethod*
+        start_elem (int):       Start tracking at the specified element. The
+          behavior is different from :py:func:`..lattice.rotate`, refpts are
+          still indexed from the begininig of the ring and s_coord starts at
+          the beginning of the ring. Default 0.
+        end_elem (int):         End tracking at the specified element (exclusive).
+          When end_elem is different from the number of lattice elements,
+          :pycode:`nturns=1` is required. Default number of elements.
 
     The following keyword arguments overload the lattice values
 
diff --git a/pyat/test/test_atpass.py b/pyat/test/test_atpass.py
index 719924dbc..cff680699 100644
--- a/pyat/test/test_atpass.py
+++ b/pyat/test/test_atpass.py
@@ -153,3 +153,32 @@ def test_1d_particle():
     rout_expected = numpy.array([1e-6, 1e-6, 0, 0, 0, 5e-13])
     # rin is changed in place
     numpy.testing.assert_equal(rin, rout_expected)
+
+def test_start_elem(hmba_lattice):
+    hmba_lattice = hmba_lattice.radiation_off(copy=True)
+    rin = numpy.array([0,1e-6,0,0,0,0])
+    rout = atpass(hmba_lattice, rin, 1, start_elem=100)
+    assert rout.shape == (6, 1, 0, 1)
+    expected = numpy.array([ 3.2e-6, -1.1e-6,  0,  0,0,  8.4e-8])
+    numpy.testing.assert_allclose(rin, expected, atol=1e-7)
+
+def test_end_elem(hmba_lattice):
+    hmba_lattice = hmba_lattice.radiation_off(copy=True)
+    rin = numpy.array([0,1e-6,0,0,0,0])
+    rout = atpass(hmba_lattice, rin, 1, start_elem=100, end_elem=110)
+    assert rout.shape == (6, 1, 0, 1)
+    expected = numpy.array([2.6e-6, 1.34e-6, 0, 0,0, 2.4e-8])
+    numpy.testing.assert_allclose(rin, expected, atol=1e-7)
+
+def test_start_end_elem_errors(hmba_lattice):
+    hmba_lattice = hmba_lattice.radiation_off(copy=True)
+    rin = numpy.array([0,1e-6,0,0,0,0])
+    with pytest.raises(ValueError):
+        rout = atpass(hmba_lattice, rin, 1, start_elem=200)
+    with pytest.raises(ValueError):
+        rout = atpass(hmba_lattice, rin, 1, end_elem=200)
+    with pytest.raises(ValueError):
+        rout = atpass(hmba_lattice, rin, 1, start_elem=100, end_elem=10)
+    with pytest.raises(ValueError):
+        rout = atpass(hmba_lattice, rin, 2, end_elem=10)
+