🐛 Describe the bug
from nvfuser import FusionDefinition, DataType
import torch
def nvfuser_fusion_id2(fd : FusionDefinition) -> None :
T0 = fd.define_tensor(symbolic_sizes=[-1, 1], contiguous=[True, True], dtype=DataType.Double, is_cpu=False)
T1 = fd.define_tensor(symbolic_sizes=[-1, -1], contiguous=[True, True], dtype=DataType.Double, is_cpu=False)
T2 = fd.ops.broadcast_in_dim(T0, output_shape=[4, 4], broadcast_dims=[0, 1])
T3 = fd.ops.div(T1, T2)
fd.add_output(T3)
a = torch.ones(4, 1, dtype=torch.double, device='cuda')
b = torch.ones(4, 4, dtype=torch.double, device='cuda')
# RuntimeError: The size of contiguity must equal to the number of non-broadcasting IterDomains
with FusionDefinition() as fd:
nvfuser_fusion_id2(fd)
fd.execute([a, b])
Versions
devel