NVIDIA · naoyam · Feb 4, 2026 · Jan 31, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/csrc/id_model/indexing.cpp b/csrc/id_model/indexing.cpp
@@ -34,6 +34,8 @@
 namespace nvfuser {
 
 TensorIndexer::TensorIndexer(IdModel& id_model) : id_model_(id_model) {
+  NVF_ERROR(isSupported(id_model.fusion()));
+
   buildLoopIndexMap();
 
   if (isDebugDumpEnabled(DebugDumpOption::IndexingVerbose)) {

diff --git a/csrc/scheduler/expr_eval_sched.cpp b/csrc/scheduler/expr_eval_sched.cpp
@@ -64,6 +64,7 @@ bool ExprEvalScheduler::canScheduleCompileTime(Fusion* fusion) {
   // TODO: remove IndexPutAccumulateOp
   if (exprs.front()
           ->isOneOf<
+              GatherOp,
-              GatherOp,
+              !exprs.front()->isa<GatherOp>() || !exprs.front()->as<GatherOp>()->exactSizes() ? GatherOp : void,
-              GatherOp,
+              !exprs.front()->isa<GatherOp>() || !exprs.front()->as<GatherOp>()->exactSizes() ? GatherOp : void,
               ScatterOp,
               SdpaFwdOp,
               SdpaBwdOp,

diff --git a/csrc/scheduler/registry.cpp b/csrc/scheduler/registry.cpp
@@ -64,6 +64,16 @@ bool checkCanSchedule(Fusion* fusion, SchedulerType scheduler_type) {
     return false;
   }
 
+  // Support of non-exact gather was dropped when the legacy indexer was
+  // deprecated
+  if (std::ranges::any_of(
+          ir_utils::getOpsOfType<GatherOp>(fusion),
+          [](GatherOp* gather) { return !gather->exactSizes(); })) {
+    scheduler_debug_utils::canScheduleRejectReason(
+        scheduler_type, "Non-exact gather ops");
+    return false;
+  }
+
   // Fusions with `MatmulOp, LinearOp, MmaOp` can only be accepted by Matmul
   // scheduler.
   if (scheduler_type != SchedulerType::Matmul &&

diff --git a/tests/cpp/test_gather.cpp b/tests/cpp/test_gather.cpp
@@ -582,7 +582,7 @@ TEST_F(GatherTest, TakeAlongAxisIntermediateTensorReduction1) {
 
   validateSegmentation(
       executor_cache.getMostRecentKernelRuntime(),
-      {SchedulerType::Reduction, SchedulerType::PointWise});
+      {SchedulerType::Reduction, SchedulerType::ExprEval});
 
   testValidate(&fusion, outputs, {t0, t1}, __LINE__, __FILE__);
 }
@@ -1127,7 +1127,8 @@ TEST_F(GatherTest, TakeAlongAxisCrossEntropyLoss) {
 }
 
 // Test grouped reduction on IterType::GatherScatter
-TEST_F(GatherTest, GatherIterGoupedReduction) {
+// Codegen support of non-exact gather dropped
+TEST_F(GatherTest, DISABLED_GatherIterGoupedReduction) {
   const int max_dim_size = 128;
   auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
   auto options_i = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0);
@@ -1211,7 +1212,8 @@ TEST_F(GatherTest, GatherIterGoupedReduction) {
       lparams);
 }
 
-TEST_F(GatherTest, SameTvUsedAsLookupAndIndex) {
+// Codegen support of non-exact gather dropped
+TEST_F(GatherTest, DISABLED_SameTvUsedAsLookupAndIndex) {
   auto fusion_ptr = std::make_unique<Fusion>();
   Fusion& fusion = *fusion_ptr.get();
   FusionGuard fg(&fusion);

diff --git a/tests/cpp/test_persistent_buffer.cpp b/tests/cpp/test_persistent_buffer.cpp
@@ -1941,7 +1941,9 @@ TEST_F(PersistentBufferTest, BufferGatherLookupTv) {
   auto tv2 = sum(tv1, {1});
   auto tv3 = broadcast(tv2, {false, true});
   auto tv4 = broadcast(index_tv, {false, true});
-  auto tv5 = gather(tv0, 1, tv4);
+  // Use takeAlongAxis rather than gather as codegen does not support
+  // the latter
+  auto tv5 = takeAlongAxis(tv0, tv4, 1);
   auto tv6 = maybeCastOp(DataType::BFloat16, tv5);
   auto tv7 = add(tv3, tv6);
   auto tv8 = add(tv1, tv7);

diff --git a/tests/cpp/test_reduction.cpp b/tests/cpp/test_reduction.cpp
@@ -2563,7 +2563,7 @@ TEST_F(ReductionTest, CrossEntropyGatherPattern) {
   fusion.addInput(labels);
 
   auto tv2 = broadcast(labels, {false, true});
-  auto tv3 = gather(log_probs, 1, tv2);
+  auto tv3 = takeAlongAxis(log_probs, tv2, 1);
   auto tv4 = squeeze(tv3, std::vector<bool>({false, true}));
 
   fusion.addOutput(tv4);