@@ -1845,7 +1845,7 @@ def func_hessian_and_cholesky_factor_direct(
18451845
18461846 if qd .static (static_rigid_sim_config .backend == gs .cpu or static_rigid_sim_config .sparse_solve ):
18471847 # CPU
1848- qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = 32 )
1848+ qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = qd . static ( 64 if static_rigid_sim_config . backend == gs . amdgpu else 32 ) )
18491849 for i_b in range (_B ):
18501850 func_hessian_and_cholesky_factor_direct_batch (
18511851 i_b ,
@@ -1861,7 +1861,7 @@ def func_hessian_and_cholesky_factor_direct(
18611861 if qd .static (static_rigid_sim_config .enable_tiled_cholesky_hessian ):
18621862 func_cholesky_factor_direct_tiled (constraint_state , rigid_global_info , static_rigid_sim_config )
18631863 else :
1864- qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = 32 )
1864+ qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = qd . static ( 64 if static_rigid_sim_config . backend == gs . amdgpu else 32 ) )
18651865 for i_b in range (_B ):
18661866 func_cholesky_factor_direct_batch (i_b , constraint_state , rigid_global_info )
18671867
@@ -3246,7 +3246,7 @@ def func_update_gradient_tiled(
32463246 )
32473247
32483248 if qd .static (static_rigid_sim_config .solver_type == gs .constraint_solver .CG ):
3249- qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = 32 )
3249+ qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = qd . static ( 64 if static_rigid_sim_config . backend == gs . amdgpu else 32 ) )
32503250 for i_b in range (_B ):
32513251 func_solve_mass_batch (
32523252 i_b ,
@@ -3289,7 +3289,7 @@ def func_update_gradient(
32893289 not static_rigid_sim_config .enable_tiled_cholesky_hessian or static_rigid_sim_config .backend == gs .cpu
32903290 ):
32913291 # CPU
3292- qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = 32 )
3292+ qd .loop_config (serialize = static_rigid_sim_config .para_level < gs .PARA_LEVEL .ALL , block_dim = qd . static ( 64 if static_rigid_sim_config . backend == gs . amdgpu else 32 ) )
32933293 for i_b in range (_B ):
32943294 func_update_gradient_batch (
32953295 i_b ,
0 commit comments