From a95801ee08fbd30697f76838e0f87a4bd6c20069 Mon Sep 17 00:00:00 2001 From: Vinayak Tiwari <84216407+VinayakTiwari1103@users.noreply.github.com> Date: Wed, 14 May 2025 00:26:29 +0530 Subject: [PATCH] FIX: Vectorized approach using NumPy searchsorted The original code filters the calendar cal for each date in rebal_dates, which is slow for large lists due which processing taking much more time as compare to small lists and the updated code uses a vectorized search (via DatetimeIndex.searchsorted) to find all appropriate indices in one go. This will avoids explicit Python loops and repeated filtering also significantly improving performance while producing the same result for large datasets. FIX:https://github.com/goldmansachs/gs-quant/issues/317 --- gs_quant/timeseries/backtesting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gs_quant/timeseries/backtesting.py b/gs_quant/timeseries/backtesting.py index ae87755e..1145db27 100644 --- a/gs_quant/timeseries/backtesting.py +++ b/gs_quant/timeseries/backtesting.py @@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License. """ - +import numpy as np from functools import partial from dateutil.relativedelta import relativedelta as rdelta @@ -80,7 +80,10 @@ def backtest_basket( rebal_dates = [cal[0] + i * rdelta(months=1) for i in range(num_rebals + 1)] # Convert the hypothetical weekly/monthly rebalance dates to actual calendar days - rebal_dates = [min(cal[cal >= date]) for date in rebal_dates if date < max(cal)] + idxs = cal.searchsorted(rebal_dates) # find insertion indices for all dates at once + idxs = idxs[idxs < len(cal)] # filter out indices beyond last date + rebal_dates = cal[idxs] # map to actual calendar dates + # Create Units dataframe units = pd.DataFrame(index=cal, columns=series.columns)