@@ -526,11 +526,16 @@ bool TransposeDomainMap::hasAtLeastTwoValidGroups(Fusion* fusion) {
526526 return false ;
527527 }
528528
529- // For grouping caused by permutation, the corresponding loop domains should
530- // not be all mapped to each other. If they are, it means the two groups are
531- // due to broadcast. In this case, they are not considered as valid groups
532- // since the broadcast tensor has a smaller size and pointwise scheduler
533- // handles broadcast well through unrolling and caching at all levels.
529+ // For grouping caused by permutation, the corresponding allocation domains
530+ // should not be all mapped to each other. If they are, it means the two
531+ // groups are due to broadcast. In this case, they are not considered as valid
532+ // groups since the broadcast tensor has a smaller size and pointwise
533+ // scheduler handles broadcast well through unrolling and caching at all
534+ // levels. For example, in TransposeTest.NoTransposeMaverick17B, two inputs
535+ // are tv0[i0, i1] and tv1[i2, b3] where i0/i2 and i1/b3 are mapped to each
536+ // other. However, tv0 and tv1 are in two different groups because of the
537+ // broadcast. In this case, we should use the pointwise scheduler instead of
538+ // the transpose scheduler.
534539 const auto & ref1_loop = ref1->getMaybeAllocationDomain ();
535540 const auto & ref2_loop = ref2->getMaybeAllocationDomain ();
536541 const auto & ca_map = domain_map.getComputeAtMap ();
0 commit comments