File tree Expand file tree Collapse file tree
hindsight-api/hindsight_api/engine Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -136,7 +136,12 @@ async def initialize(self) -> None:
136136 # 3. Data transfer overhead to GPU outweighs compute benefit
137137 # 4. CPU inference is actually faster for this workload
138138 logger .info (f"Reranker: initializing local provider with model { self .model_name } " )
139- self ._model = CrossEncoder (self .model_name )
139+ # Disable lazy loading (meta tensors) which causes issues with newer transformers/accelerate.
140+ # Setting low_cpu_mem_usage=False and device_map=None ensures tensors are fully materialized.
141+ self ._model = CrossEncoder (
142+ self .model_name ,
143+ model_kwargs = {"low_cpu_mem_usage" : False , "device_map" : None },
144+ )
140145
141146 # Initialize shared executor (limited workers naturally limits concurrency)
142147 if LocalSTCrossEncoder ._executor is None :
You can’t perform that action at this time.
0 commit comments