diff --git a/.circleci/config.yml b/.circleci/config.yml
index cd2963d..0121615 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,7 +1,7 @@
 version: 2.1
 
 orbs:
-  python: circleci/python@0.2.1
+  python: circleci/python@1.5.0
 
 jobs:
   unit-tests:
@@ -11,12 +11,12 @@ jobs:
       - run:
           name: setup
           command: |
-            virtualenv -p python3.7 .venv
+            virtualenv -p python3.8 .venv
             source .venv/bin/activate
             # pip install -q torch==1.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
             # temporary workaround for https://github.com/pytorch/pytorch/issues/49560
-            wget https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp37-cp37m-linux_x86_64.whl
-            pip install -q torch-1.6.0+cpu-cp37-cp37m-linux_x86_64.whl 
+            wget https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp38-cp38-linux_x86_64.whl
+            pip install -q torch-1.6.0+cpu-cp38-cp38-linux_x86_64.whl
             pip install -q .
             pip install -q -r requirements-test.txt
       - run:
@@ -33,13 +33,14 @@ jobs:
       - run:
           name: setup
           command: |
-            virtualenv -p python3.7 .venv
+            virtualenv -p python3.8 .venv
             source .venv/bin/activate
             # pip install -q torch==1.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
             # temporary workaround for https://github.com/pytorch/pytorch/issues/49560
-            wget https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp37-cp37m-linux_x86_64.whl
-            pip install -q torch-1.6.0+cpu-cp37-cp37m-linux_x86_64.whl 
+            wget https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp38-cp38-linux_x86_64.whl
+            pip install -q torch-1.6.0+cpu-cp38-cp38-linux_x86_64.whl
             pip install -q .
+            sudo apt-get update -y
             sudo apt-get -y install cmake
       - run:
           name: Test
diff --git a/sru/modules.py b/sru/modules.py
index 3b3b6c0..d1eea37 100644
--- a/sru/modules.py
+++ b/sru/modules.py
@@ -1,3 +1,11 @@
+"""
+This module implements core classes SRU and SRUCell.
+
+Implementation note 1: We have postponed the import of sru.ops to the first SRUCell
+instantiation in order to ensure CUDA init takes place in the process that will be
+running the model. Please see the class method init_elementwise_recurrence_funcs.
+"""
+
 import copy
 import warnings
 import math
@@ -8,10 +16,6 @@
 from torch import Tensor
 from torch.nn.utils.rnn import PackedSequence
 
-from sru.ops import (elementwise_recurrence_inference,
-                     elementwise_recurrence_gpu,
-                     elementwise_recurrence_naive)
-
 
 class SRUCell(nn.Module):
     """
@@ -27,6 +31,11 @@ class SRUCell(nn.Module):
     scale_x: Tensor
     weight_proj: Optional[Tensor]
 
+    initialized = False
+    elementwise_recurrence_inference = None
+    elementwise_recurrence_gpu = None
+    elementwise_recurrence_naive = None
+
     def __init__(self,
                  input_size: int,
                  hidden_size: int,
@@ -160,6 +169,24 @@ def __init__(self,
                 self.layer_norm = nn.LayerNorm(self.input_size)
 
         self.reset_parameters()
+        SRUCell.init_elementwise_recurrence_funcs()
+
+    @classmethod
+    def init_elementwise_recurrence_funcs(cls):
+        """
+        Initializes the elementwise recurrence functions. This is postponed to the creation
+        of the first SRUCell instance because we want to avoid eager CUDA initialization and
+        ensure it takes place in the process running the model.
+        """
+        if cls.initialized:
+            return
+        from sru.ops import (elementwise_recurrence_inference,
+                             elementwise_recurrence_gpu,
+                             elementwise_recurrence_naive)
+        cls.elementwise_recurrence_inference = elementwise_recurrence_inference
+        cls.elementwise_recurrence_gpu = elementwise_recurrence_gpu
+        cls.elementwise_recurrence_naive = elementwise_recurrence_naive
+        cls.initialized = True
 
     def reset_parameters(self):
         """Properly initialize the weights of SRU, following the same
@@ -295,27 +322,27 @@ def apply_recurrence(self,
         """
         if not torch.jit.is_scripting():
             if self.bias.is_cuda:
-                return elementwise_recurrence_gpu(U, residual, V, self.bias, c0,
-                                                  self.activation_type,
-                                                  self.hidden_size,
-                                                  self.bidirectional,
-                                                  self.has_skip_term,
-                                                  scale_val, mask_c, mask_pad,
-                                                  self.amp_recurrence_fp16)
+                return SRUCell.elementwise_recurrence_gpu(U, residual, V, self.bias, c0,
+                                                       self.activation_type,
+                                                       self.hidden_size,
+                                                       self.bidirectional,
+                                                       self.has_skip_term,
+                                                       scale_val, mask_c, mask_pad,
+                                                       self.amp_recurrence_fp16)
             else:
-                return elementwise_recurrence_naive(U, residual, V, self.bias, c0,
-                                                    self.activation_type,
-                                                    self.hidden_size,
-                                                    self.bidirectional,
-                                                    self.has_skip_term,
-                                                    scale_val, mask_c, mask_pad)
+                return SRUCell.elementwise_recurrence_naive(U, residual, V, self.bias, c0,
+                                                         self.activation_type,
+                                                         self.hidden_size,
+                                                         self.bidirectional,
+                                                         self.has_skip_term,
+                                                         scale_val, mask_c, mask_pad)
         else:
-            return elementwise_recurrence_inference(U, residual, V, self.bias, c0,
-                                                    self.activation_type,
-                                                    self.hidden_size,
-                                                    self.bidirectional,
-                                                    self.has_skip_term,
-                                                    scale_val, mask_c, mask_pad)
+            return SRUCell.elementwise_recurrence_inference(U, residual, V, self.bias, c0,
+                                                         self.activation_type,
+                                                         self.hidden_size,
+                                                         self.bidirectional,
+                                                         self.has_skip_term,
+                                                         scale_val, mask_c, mask_pad)
 
 
     def compute_UV(self,
diff --git a/sru/version.py b/sru/version.py
index f0e5e1e..bcd1100 100644
--- a/sru/version.py
+++ b/sru/version.py
@@ -1 +1 @@
-__version__ = '2.6.0'
+__version__ = '2.7.0-rc1'
diff --git a/test/test_1.py b/test/test_1.py
new file mode 100644
index 0000000..b6482df
--- /dev/null
+++ b/test/test_1.py
@@ -0,0 +1,8 @@
+import torch
+
+
+# Run at the beginning of the test suites to ensure no previous use of SRUCells
+def test_no_eager_cuda_init():
+    # Notice the test is expected to pass both with GPU available and without it
+    import sru
+    assert not torch.cuda.is_initialized()
\ No newline at end of file