diff --git a/README.md b/README.md
index 47ac0bd..bfb389d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 # HFCXX
 
 [![Build](https://github.com/ifilot/hfcxx/actions/workflows/build.yml/badge.svg)](https://github.com/ifilot/hfcxx/actions/workflows/build.yml)
+![Version](https://img.shields.io/github/v/tag/ifilot/hfcxx?label=version)
+[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
 
 ## Overview
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 85562be..37227ce 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,7 +20,7 @@
 #**************************************************************************/
 
 # set minimum cmake requirements
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required(VERSION 3.5)
 project (hfcxx)
 
 # add custom directory to look for .cmake files
diff --git a/src/hf.cpp b/src/hf.cpp
index 9ebd62e..b0d1ca9 100644
--- a/src/hf.cpp
+++ b/src/hf.cpp
@@ -65,7 +65,7 @@ void HF::molecule(const Molecule &moll) {
     for(unsigned int i=0; i<V.size(); i++) {
         V[i].resize(nrorbs,nrorbs);
     }
-    TE.resize(teindex(nrorbs,nrorbs,nrorbs,nrorbs)+1, -1.0);
+    TE.resize(pow(nrorbs,4)+1, -1.0);
 
     if(debug) {
         std::cout << std::endl;
@@ -149,29 +149,61 @@ void HF::setup() {
         std::cout << "Constructing TE matrix... (" << teindex(nrorbs-1,nrorbs-1,nrorbs-1,nrorbs-1)+1 << ") " << std::endl;
     }
 
-    // build a vector of two-electron integral jobs to evaluate
-    double tecnt=0;
-    std::vector<std::array<unsigned int,5>> te_jobs;
-    for(unsigned int i=0; i<nrorbs; i++) {
-        for(unsigned int j=0; j<=i; j++) {
-            unsigned int ij = i*(i+1)/2 + j;
-            for(unsigned int k=0; k<nrorbs; k++) {
-                for(unsigned int l=0; l<=k; l++) {
-                    unsigned int kl = k * (k+1)/2 + l;
+    // calculate all two-electron integrals
+    const size_t sz = orbitals.size();
+    const size_t tecnt = teindex(sz-1,sz-1,sz-1,sz-1) + 1;
+    std::vector<double> tedouble(tecnt, -1.0);
+
+    // it is more efficient to first 'unroll' the fourfold nested loop
+    // into a single vector of jobs to execute
+    std::vector<std::array<size_t, 5>> jobs;
+    for(size_t i=0; i<sz; i++) {
+        for(size_t j=0; j<sz; j++) {
+            size_t ij = i*(i+1)/2 + j;
+            for(size_t k=0; k<sz; k++) {
+                for(size_t l=0; l<sz; l++) {
+                    size_t kl = k * (k+1)/2 + l;
                     if(ij <= kl) {
-                        unsigned int index = teindex(i,j,k,l);
-                        te_jobs.push_back({index,i,j,k,l});
-                        tecnt++;
+                        const size_t idx = teindex(i,j,k,l);
+
+                        if(idx >= tedouble.size()) {
+                            throw std::runtime_error("Process tried to access illegal array position");
+                        }
+
+                        if(tedouble[idx] < 0.0) {
+                            tedouble[idx] = 1.0;
+                            jobs.emplace_back(std::array<size_t,5>({idx, i, j, k, l}));
+                        }
                     }
                 }
             }
         }
     }
 
-    // perform the two-electron integral calculation
+    // evaluate jobs
     #pragma omp parallel for schedule(dynamic)
-    for(const auto& te_job : te_jobs) {
-        TE[te_job[0]] = cgf_repulsion(orbitals[te_job[1]],orbitals[te_job[2]],orbitals[te_job[3]],orbitals[te_job[4]]);
+    for(int s=0; s<(int)jobs.size(); s++) {  // have to use signed int for MSVC OpenMP here
+        const size_t idx = jobs[s][0];
+        const size_t i = jobs[s][1];
+        const size_t j = jobs[s][2];
+        const size_t k = jobs[s][3];
+        const size_t l = jobs[s][4];
+        tedouble[idx] = cgf_repulsion(orbitals[i], orbitals[j], orbitals[k], orbitals[l]);
+    }
+
+    const size_t sz2 = sz * sz;
+    const size_t sz3 = sz2 * sz;
+
+    // reorganize everyting into a tensor object
+    for(size_t i=0; i<sz; i++) {
+        for(size_t j=0; j<sz; j++) {
+            for(size_t k=0; k<sz; k++) {
+                for(size_t l=0; l<sz; l++) {
+                    const size_t idx = teindex(i,j,k,l);
+                    TE[i*sz3 + j*sz2 + k*sz + l] = tedouble[idx];
+                }
+            }
+        }
     }
 
     if(debug) {
@@ -195,23 +227,24 @@ void HF::setup() {
 }
 
 void HF::step() {
-cntstep++;
+    cntstep++;
 
-unsigned int index1 = 0;
-unsigned int index2 = 0;
+    const size_t sz = nrorbs;
+    const size_t sz2 = sz * sz;
+    const size_t sz3 = sz2 * sz;
 
-    for(unsigned int i=0; i<nrorbs; i++) {
-    for(unsigned int j=0; j<nrorbs; j++) {
+    for(unsigned int i=0; i<sz; i++) {
+        for(unsigned int j=0; j<sz; j++) {
             G[i][j] = 0.; /* reset G matrix */
-      for(unsigned int k=0; k<nrorbs; k++) {
-        for(unsigned int l=0; l<nrorbs; l++) {
-                    index1 = teindex(i,j,l,k);
-          index2 = teindex(i,k,l,j);
-          G[i][j] += P[k][l] * (TE[index1] - 0.5 * TE[index2]);
+            for(unsigned int k=0; k<sz; k++) {
+                for(unsigned int l=0; l<sz; l++) {
+                    const size_t index1 = i*sz3 + j*sz2 + k*sz + l;
+                    const size_t index2 = i*sz3 + k*sz2 + l*sz + j;
+                    G[i][j] += P[k][l] * (TE[index1] - 0.5 * TE[index2]);
+                }
+            }
         }
-      }
     }
-  }
 
     /* construct Fock Matrix and orthogonalize it */
     F = matsum(H,G);
diff --git a/src/hf.h b/src/hf.h
index 02e54ba..c0b1d77 100644
--- a/src/hf.h
+++ b/src/hf.h
@@ -28,6 +28,7 @@
 #include <array>
 #include <string>
 #include <sstream>
+#include <array>
 
 #include "config.h"
 #include "molecule.h"
diff --git a/src/repulsion.cpp b/src/repulsion.cpp
index add3866..79b4467 100644
--- a/src/repulsion.cpp
+++ b/src/repulsion.cpp
@@ -76,11 +76,17 @@ double coulomb_repulsion(const Vec3 &a, const double &norma, const int la, const
     std::vector<double> by = B_array(ma, mb, mc, md, p.gety(), a.gety(), b.gety(), q.gety(), c.gety(), d.gety(), gamma1, gamma2, delta);
     std::vector<double> bz = B_array(na, nb, nc, nd, p.getz(), a.getz(), b.getz(), q.getz(), c.getz(), d.getz(), gamma1, gamma2, delta);
 
+    // pre-calculate all Fgamma values
+    std::vector<double> fg(la+lb+lc+ld+ma+mb+mc+md+na+nb+nc+nd+1);
+    for (unsigned int i=0; i<fg.size(); ++i) {
+        fg[i] = Fgamma(i,0.25*rpq2/delta);
+    }
+
     double sum = 0.0;
     for(int i=0; i<=(la+lb+lc+ld); i++) {
         for(int j=0; j<=(ma+mb+mc+md); j++) {
             for(int k=0; k<=(na+nb+nc+nd); k++) {
-                sum += bx[i]*by[j]*bz[k]*Fgamma(i+j+k,0.25*rpq2/delta);
+                sum += bx[i]*by[j]*bz[k]*fg[i+j+k];
             }
         }
     }
diff --git a/src/test/molecules/benzene.in b/src/test/molecules/benzene.in
index ca454ee..9c69b76 100644
--- a/src/test/molecules/benzene.in
+++ b/src/test/molecules/benzene.in
@@ -1,4 +1,4 @@
-basis = sto-6g
+basis = sto-3g
 charge = 0
 atoms = 12
 units = angstrom
diff --git a/src/test/test_molecule.cpp b/src/test/test_molecule.cpp
index 6f03d35..c8beb0d 100644
--- a/src/test/test_molecule.cpp
+++ b/src/test/test_molecule.cpp
@@ -75,5 +75,5 @@ void TestMolecule::testBenzene() {
     hf.run();
 
     double energy = hf.get_energy();
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(-230.13033852795101, energy, 1e-4);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(-227.890742770645, energy, 1e-4);
 }
diff --git a/src/version.h b/src/version.h
index 70fbdc0..597a32a 100644
--- a/src/version.h
+++ b/src/version.h
@@ -26,7 +26,7 @@
 #include <sstream>
 #include <string>
 
-#define PACKAGE_VERSION "1.5.0"
+#define PACKAGE_VERSION "1.5.1"
 
 std::string version();