From a95801ee08fbd30697f76838e0f87a4bd6c20069 Mon Sep 17 00:00:00 2001
From: Vinayak Tiwari <84216407+VinayakTiwari1103@users.noreply.github.com>
Date: Wed, 14 May 2025 00:26:29 +0530
Subject: [PATCH] FIX: Vectorized approach using NumPy searchsorted

The original code filters the calendar cal for each date in rebal_dates, which is slow for large lists due which processing taking much more time as compare to small lists  and the updated code uses a vectorized search (via DatetimeIndex.searchsorted) to find all appropriate indices in one go. This will avoids explicit Python loops and repeated filtering also significantly improving performance while producing the same result for large datasets.
FIX:https://github.com/goldmansachs/gs-quant/issues/317
---
 gs_quant/timeseries/backtesting.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gs_quant/timeseries/backtesting.py b/gs_quant/timeseries/backtesting.py
index ae87755e..1145db27 100644
--- a/gs_quant/timeseries/backtesting.py
+++ b/gs_quant/timeseries/backtesting.py
@@ -13,7 +13,7 @@
 specific language governing permissions and limitations
 under the License.
 """
-
+import numpy as np
 from functools import partial
 
 from dateutil.relativedelta import relativedelta as rdelta
@@ -80,7 +80,10 @@ def backtest_basket(
             rebal_dates = [cal[0] + i * rdelta(months=1) for i in range(num_rebals + 1)]
 
         # Convert the hypothetical weekly/monthly rebalance dates to actual calendar days
-        rebal_dates = [min(cal[cal >= date]) for date in rebal_dates if date < max(cal)]
+        idxs = cal.searchsorted(rebal_dates)        # find insertion indices for all dates at once
+        idxs = idxs[idxs < len(cal)]               # filter out indices beyond last date
+        rebal_dates = cal[idxs]                     # map to actual calendar dates
+
 
     # Create Units dataframe
     units = pd.DataFrame(index=cal, columns=series.columns)