| 
 | 1 | +#!/usr/bin/env python  | 
 | 2 | + | 
 | 3 | +"""  | 
 | 4 | +OCCRI Performance Demonstration  | 
 | 5 | +
  | 
 | 6 | +This example demonstrates OCCRI's performance characteristics and shows  | 
 | 7 | +how to benchmark it against standard FFTDF. OCCRI provides significant  | 
 | 8 | +speedup while maintaining chemical accuracy.  | 
 | 9 | +
  | 
 | 10 | +Key topics covered:  | 
 | 11 | +- Timing OCCRI vs FFTDF calculations  | 
 | 12 | +- Performance scaling considerations  | 
 | 13 | +- When OCCRI provides the most benefit  | 
 | 14 | +- How to optimize OCCRI performance  | 
 | 15 | +"""  | 
 | 16 | + | 
 | 17 | +import time  | 
 | 18 | + | 
 | 19 | +import numpy  | 
 | 20 | + | 
 | 21 | +from pyscf.occri import OCCRI  | 
 | 22 | +from pyscf.pbc import df, gto, scf  | 
 | 23 | + | 
 | 24 | +# Set up a moderately sized system for performance comparison  | 
 | 25 | +cell = gto.Cell()  | 
 | 26 | +cell.atom = """  | 
 | 27 | +    C 0.000000 0.000000 1.780373  | 
 | 28 | +    C 0.890186 0.890186 2.670559  | 
 | 29 | +    C 0.000000 1.780373 0.000000  | 
 | 30 | +    C 0.890186 2.670559 0.890186  | 
 | 31 | +"""  | 
 | 32 | +cell.basis = "gth-cc-tzvp"  | 
 | 33 | +cell.pseudo = "gth-pbe"  | 
 | 34 | +cell.a = numpy.array(  | 
 | 35 | +    [  | 
 | 36 | +        [3.560745, 0.000000, 0.000000],  | 
 | 37 | +        [0.000000, 3.560745, 0.000000],  | 
 | 38 | +        [0.000000, 0.000000, 3.560745],  | 
 | 39 | +    ]  | 
 | 40 | +)  | 
 | 41 | +cell.mesh = [25] * 3  | 
 | 42 | +cell.verbose = 0  | 
 | 43 | +cell.build()  | 
 | 44 | +kmesh = [1,1,1]  | 
 | 45 | +kpts = cell.make_kpts(kmesh)  | 
 | 46 | + | 
 | 47 | +print("=== OCCRI Performance Comparison ===")  | 
 | 48 | +print(  | 
 | 49 | +    f"System: {' '.join(cell.atom_symbol(i) for i in range(cell.natm))} ({cell.natm} atoms, {cell.nao} AOs)"  | 
 | 50 | +)  | 
 | 51 | +print(f"Basis: {cell.basis}")  | 
 | 52 | +print(f"Mesh: {cell.mesh}")  | 
 | 53 | + | 
 | 54 | +# Example 1: Compare K matrix construction: FFTDF vs OCCRI  | 
 | 55 | +print("\n1. K matrix construction timing comparison")  | 
 | 56 | + | 
 | 57 | +# Set up common density matrix for fair comparison  | 
 | 58 | +print("   Setting up test density matrix...")  | 
 | 59 | +mf_ref = scf.KRHF(cell, kpts=kpts)  | 
 | 60 | +mf_ref.max_cycle = 1  # Store MO Coeff for comparison  | 
 | 61 | +mf_ref.kernel()  | 
 | 62 | +dm = mf_ref.make_rdm1(kpts=kpts)  | 
 | 63 | + | 
 | 64 | +# Time FFTDF K matrix construction only  | 
 | 65 | +print("   Timing FFTDF K matrix construction...")  | 
 | 66 | +mf_ref = scf.KRHF(cell, kpts=kpts)  | 
 | 67 | +mf_ref._is_mem_enough = lambda: False  # Turn off 'incore' for small demo  | 
 | 68 | +start_time = time.time()  | 
 | 69 | +_, vk_fftdf = mf_ref.get_jk(dm_kpts=dm, with_j=False, with_k=True, kpts=kpts)  | 
 | 70 | +fftdf_k_time = time.time() - start_time  | 
 | 71 | + | 
 | 72 | +# Time OCCRI K matrix construction only  | 
 | 73 | +print("   Timing OCCRI K matrix construction...")  | 
 | 74 | +mf_occri = scf.KRHF(cell, kpts=kpts)  | 
 | 75 | +mf_occri.with_df = OCCRI(mf_occri, disable_c=True, kmesh=kmesh)  | 
 | 76 | +mf_occri.with_df.scf_iter = 1  # Don't rebuild MOs for timing  | 
 | 77 | + | 
 | 78 | +start_time = time.time()  | 
 | 79 | +_, vk_occri = mf_occri.get_jk(dm=dm, with_j=False, with_k=True, kpts=kpts)  | 
 | 80 | +occri_k_time = time.time() - start_time  | 
 | 81 | + | 
 | 82 | +# Results  | 
 | 83 | +k_energy_fftdf = numpy.einsum("kij,kji", vk_fftdf, dm) * 0.5  | 
 | 84 | +k_energy_occri = numpy.einsum("kij,kji", vk_occri, dm) * 0.5  | 
 | 85 | +energy_diff = abs(k_energy_fftdf - k_energy_occri)  | 
 | 86 | +k_speedup = fftdf_k_time / occri_k_time  | 
 | 87 | + | 
 | 88 | +print(f"   FFTDF K matrix:   {k_energy_fftdf:.8f} Ha ({fftdf_k_time:.3f}s)")  | 
 | 89 | +print(f"   OCCRI K matrix:   {k_energy_occri:.8f} Ha ({occri_k_time:.3f}s)")  | 
 | 90 | +print(f"   Energy difference: {energy_diff:.2e} Hartree")  | 
 | 91 | +print(f"   K matrix speedup: {k_speedup:.2f}x")  | 
 | 92 | + | 
 | 93 | +# Example 2: K matrix timing for multiple calls (realistic usage)  | 
 | 94 | +print("\n2. Multiple K matrix evaluations (typical in SCF)")  | 
 | 95 | + | 
 | 96 | +print("   Testing with 7 K matrix evaluations...")  | 
 | 97 | +n_calls = 7  | 
 | 98 | + | 
 | 99 | +# Time multiple FFTDF K matrix calls  | 
 | 100 | +print("   Timing FFTDF...")  | 
 | 101 | +start_time = time.time()  | 
 | 102 | +for i in range(n_calls):  | 
 | 103 | +    _, vk_fftdf = mf_ref.get_jk(dm_kpts=dm, with_j=False, with_k=True, kpts=kpts)  | 
 | 104 | +fftdf_multi_time = time.time() - start_time  | 
 | 105 | + | 
 | 106 | +# Time multiple OCCRI K matrix calls  | 
 | 107 | +print("   Timing OCCRI...")  | 
 | 108 | +start_time = time.time()  | 
 | 109 | +for i in range(n_calls):  | 
 | 110 | +    _, vk_occri = mf_occri.get_jk(dm=dm, with_j=False, with_k=True, kpts=kpts)  | 
 | 111 | +occri_multi_time = time.time() - start_time  | 
 | 112 | + | 
 | 113 | +multi_speedup = fftdf_multi_time / occri_multi_time  | 
 | 114 | + | 
 | 115 | +print(  | 
 | 116 | +    f"   FFTDF: {n_calls} calls in {fftdf_multi_time:.3f}s ({fftdf_multi_time/n_calls:.3f}s per call)"  | 
 | 117 | +)  | 
 | 118 | +print(  | 
 | 119 | +    f"   OCCRI: {n_calls} calls in {occri_multi_time:.3f}s ({occri_multi_time/n_calls:.3f}s per call)"  | 
 | 120 | +)  | 
 | 121 | +print(f"   Average K speedup: {multi_speedup:.2f}x")  | 
 | 122 | + | 
 | 123 | + | 
 | 124 | +print("\n=== Performance Summary ===")  | 
 | 125 | +print(f"• K matrix construction speedup: {k_speedup:.1f}x (single call)")  | 
 | 126 | +print(f"• K matrix construction speedup: {multi_speedup:.1f}x (multiple calls)")  | 
 | 127 | +print(f"• Exchange energy accuracy: ~{energy_diff:.0e} Hartree")  | 
 | 128 | + | 
 | 129 | +print("\n=== Optimization Notes ===")  | 
 | 130 | +try:  | 
 | 131 | +    from pyscf.occri import _OCCRI_C_AVAILABLE  | 
 | 132 | + | 
 | 133 | +    if _OCCRI_C_AVAILABLE:  | 
 | 134 | +        print("✓ Using optimized C extension with FFTW and OpenMP")  | 
 | 135 | +        print("  - Compiled C code provides ~5-10x base speedup")  | 
 | 136 | +        print("  - FFTW optimized FFTs for best performance")  | 
 | 137 | +        print("  - OpenMP parallelization scales with CPU cores")  | 
 | 138 | +    else:  | 
 | 139 | +        print("⚠ Using Python fallback implementation")  | 
 | 140 | +        print("  - Install FFTW, BLAS, and OpenMP for optimal performance")  | 
 | 141 | +        print("  - C extension provides significant additional speedup")  | 
 | 142 | +except ImportError:  | 
 | 143 | +    print("⚠ OCCRI module information not available")  | 
 | 144 | + | 
 | 145 | +print("\n=== Benchmarking Tips ===")  | 
 | 146 | +print("To properly benchmark OCCRI:")  | 
 | 147 | +print("• Run multiple trials and average timings for statistical significance")  | 
 | 148 | +print("• Use representative system sizes (OCCRI benefits scale with system size)")  | 
 | 149 | +print("• Test both C extension and Python implementations")  | 
 | 150 | +print("• Consider memory usage in addition to timing")  | 
 | 151 | +print("• Verify energy accuracy remains within acceptable thresholds")  | 
 | 152 | + | 
 | 153 | +print("\n=== When to Use OCCRI ===")  | 
 | 154 | +print("OCCRI provides most benefit for:")  | 
 | 155 | +print("• Large basis sets: cc-pVTZ, aug-cc-pVDZ, gth-cc-tzvp")  | 
 | 156 | +print("• Systems where N_AO >> N_occ (wide band gap insulators)")  | 
 | 157 | +print("• Hybrid DFT calculations requiring exact exchange")  | 
 | 158 | +print("• k-point calculations (see 02-kpoint_calculations.py)")  | 
 | 159 | +print("• Production calculations where FFTDF becomes a bottleneck")  | 
 | 160 | +print("")  | 
 | 161 | +print("OCCRI may be slower for:")  | 
 | 162 | +print("• Small basis sets: STO-3G, 6-31G, gth-szv")  | 
 | 163 | +print("• Metallic systems where N_occ ≈ N_AO/2")  | 
 | 164 | +print("• Quick test calculations with minimal basis sets")  | 
 | 165 | + | 
 | 166 | +print("\n=== Critical Performance Scaling Insight ===")  | 
 | 167 | +print("K matrix construction complexity (the bottleneck OCCRI optimizes):")  | 
 | 168 | +print("• FFTDF K matrix: O(N_k² × N_AO² × N_grid × log(N_grid))")  | 
 | 169 | +print("• OCCRI K matrix: O(N_k² × N_occ² × N_grid × log(N_grid))")  | 
 | 170 | +print(  | 
 | 171 | +    f"• Theoretical K matrix speedup: N_AO²/N_occ² = {cell.nao**2/(cell.nelectron//2)**2:.1f}x"  | 
 | 172 | +)  | 
 | 173 | +print("")  | 
 | 174 | + | 
 | 175 | +print(f"\nCurrent system ({cell.basis}):")  | 
 | 176 | +print(f"• {cell.nao} AOs, {cell.nelectron//2} occupied orbitals")  | 
 | 177 | +print(f"• Theoretical K speedup limit: {cell.nao**2/(cell.nelectron//2)**2:.1f}x")  | 
 | 178 | +print("• Practical K speedup: typically achieves 10-30% of limit")  | 
 | 179 | + | 
 | 180 | +print("\n=== Additional Scaling Factors ===")  | 
 | 181 | +print("• k-point calculations: O(N_k²) scaling favors OCCRI even more")  | 
 | 182 | +print("• C extension: provides additional ~5-10x speedup")  | 
 | 183 | +print("• Memory: OCCRI scales as O(N_occ) vs FFTDF O(N_AO)")  | 
 | 184 | + | 
 | 185 | +print(  | 
 | 186 | +    "\nExample completed! Try different basis sets (gth-szv, gth-dzvp, gth-cc-tzvp) to see scaling."  | 
 | 187 | +)  | 
0 commit comments