mxtensor: make data argument first and rename to qdata (#2804)

vkuzo · web-flow · commit 249d95b849ec · 2025-08-20T07:36:59.000-04:00
Update

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_inference_workflow.py b/test/prototype/mx_formats/test_inference_workflow.py
@@ -55,7 +55,7 @@ def run_around_tests():
     "ROCm float4 gemm require gfx950"
 )  # TODO(future): deploy gfx950 in ROCM CI
 @pytest.mark.skipif(not is_sm_at_least_100(), reason="CUDA capability >= 10.0 required")
-def test_inference_workflow(elem_dtype, bias: bool, compile: bool):
+def test_inference_workflow_mx(elem_dtype, bias: bool, compile: bool):
     """
     Smoke test for inference compile
     """
diff --git a/test/prototype/mx_formats/test_mx_mm.py b/test/prototype/mx_formats/test_mx_mm.py
@@ -38,8 +38,8 @@ def run_matrix_test(M: int, K: int, N: int, format) -> float:
     a_mx = MXTensor.to_mx(a, fmt, 32)
     b_mx = MXTensor.to_mx(b, fmt, 32)
 
-    a_data = a_mx._data
-    b_data = b_mx._data
+    a_data = a_mx.qdata
+    b_data = b_mx.qdata
     assert b_data.is_contiguous()
     b_data = b_data.transpose(-1, -2)
 
diff --git a/test/prototype/mx_formats/test_mx_tensor.py b/test/prototype/mx_formats/test_mx_tensor.py
@@ -73,9 +73,9 @@ def assert_sqnr_gt_threshold(orig, new, threshold):
     # verify that if data.shape is (M, K) then scale.shape is (M, K // block_size)
     prev_dims, K = data_hp.shape[:-1], data_hp.shape[-1]
     if elem_dtype is torch.float4_e2m1fn_x2:
-        assert data_mx._data.shape == (*prev_dims, K // 2)
+        assert data_mx.qdata.shape == (*prev_dims, K // 2)
     else:
-        assert data_mx._data.shape == (*prev_dims, K)
+        assert data_mx.qdata.shape == (*prev_dims, K)
     assert data_mx._scale_e8m0.shape == (*prev_dims, K // block_size)
 
 
@@ -148,8 +148,8 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    assert torch.isnan(data_mx._data[0])
-    assert torch.all(data_mx._data[1:] == 0)
+    assert torch.isnan(data_mx.qdata[0])
+    assert torch.all(data_mx.qdata[1:] == 0)
     # fp32 denorm
     # fmt: off
     data_hp = torch.tensor(
@@ -170,7 +170,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # bf16 denorm
     # fmt: off
     data_hp = torch.tensor(
@@ -191,7 +191,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # fp32 some denorm
     # fmt: off
     data_hp = torch.tensor(
@@ -222,7 +222,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # bf16 some denorm
     # fmt: off
     data_hp = torch.tensor(
@@ -253,7 +253,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # zero
     data_hp = torch.tensor([0] * 32, dtype=torch.uint32).view(torch.float32)
     ground_truth_scale = torch.tensor([0], dtype=torch.uint8).view(torch.float8_e8m0fnu)
@@ -264,7 +264,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # fp32 normal
     # fmt: off
     data_hp = torch.tensor(
@@ -295,7 +295,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
     # bf16 normal
     # fmt: off
     data_hp = torch.tensor(
@@ -326,7 +326,7 @@ def test_to_mx_rceil():
         data_hp, torch.float8_e4m3fn, 32, ScaleCalculationMode.RCEIL
     )
     torch.testing.assert_close(data_mx._scale_e8m0, ground_truth_scale)
-    torch.testing.assert_close(data_mx._data, ground_truth_fp8)
+    torch.testing.assert_close(data_mx.qdata, ground_truth_fp8)
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -382,8 +382,8 @@ def test_exponent_nan_out(elem_dtype, pack_fp6):
     block_size = 4
     use_fp4_custom_triton_dequant_kernel = False
     tensor_mx = MXTensor(
-        scale_e8m0,
         data_bits,
+        scale_e8m0,
         elem_dtype,
         block_size,
         torch.float,
@@ -473,7 +473,7 @@ def test_fp6_packing(elem_dtype, pack_fp6):
     else:
         expected_packed_shape = x.shape
 
-    assert x_mx._data.shape == expected_packed_shape
+    assert x_mx.qdata.shape == expected_packed_shape
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@@ -505,14 +505,14 @@ def test_to_mx_from_mx_compile_numerics(elem_dtype, hp_dtype, all_zeros):
         atol=0,
         rtol=0,
     )
-    torch.testing.assert_close(x_mx._data, x_mx_c._data, atol=0, rtol=0)
+    torch.testing.assert_close(x_mx.qdata, x_mx_c.qdata, atol=0, rtol=0)
 
     to_dtype_c = torch.compile(to_dtype, fullgraph=True)
 
     use_fp4_custom_triton_dequant_kernel = False
     pack_fp6 = False
     x_mx_dq = to_dtype(
-        x_mx._data,
+        x_mx.qdata,
         x_mx._scale_e8m0,
         x_mx._elem_dtype,
         x_mx._block_size,
@@ -521,7 +521,7 @@ def test_to_mx_from_mx_compile_numerics(elem_dtype, hp_dtype, all_zeros):
         pack_fp6,
     )
     x_mx_c_dq = to_dtype_c(
-        x_mx_c._data,
+        x_mx_c.qdata,
         x_mx_c._scale_e8m0,
         x_mx_c._elem_dtype,
         x_mx_c._block_size,
diff --git a/torchao/prototype/mx_formats/mx_linear.py b/torchao/prototype/mx_formats/mx_linear.py
@@ -60,8 +60,8 @@ def _to_mxfp8_dim1_kernel_wrapper(
         a_data_local = a_data.to_local()
         a_scale_local = a_scale.to_local()
         inner = MXTensor(
-            a_scale_local,
             a_data_local.t(),
+            a_scale_local,
             elem_dtype,
             block_size,
             hp_dtype,
@@ -79,8 +79,8 @@ def _to_mxfp8_dim1_kernel_wrapper(
         )
     else:
         mx_tensor = MXTensor(
-            a_scale,
             a_data.t(),
+            a_scale,
             elem_dtype,
             block_size,
             hp_dtype,
diff --git a/torchao/prototype/mx_formats/mx_ops.py b/torchao/prototype/mx_formats/mx_ops.py
@@ -91,8 +91,8 @@ def _addmm_mx_dispatch(
     if gemm_choice in (MXGemmKernelChoice.CUBLAS, MXGemmKernelChoice.CUTLASS):
         # real MX gemm backed by torchao's CUTLASS kernels
         M, K, N = a.shape[0], a.shape[1], b.shape[1]
-        assert a._data.is_contiguous()
-        assert b._data.t().is_contiguous()
+        assert a.qdata.is_contiguous()
+        assert b.qdata.t().is_contiguous()
         assert a._block_size == 32, f"Invalid block size {a._block_size}"
         assert b._block_size == 32, f"Invalid block size {b._block_size}"
 
@@ -108,8 +108,8 @@ def _addmm_mx_dispatch(
             )
 
             res = torch._scaled_mm(
-                a._data,
-                b._data,
+                a.qdata,
+                b.qdata,
                 a_scale_block.view(torch.float8_e8m0fnu),
                 b_scale_block.view(torch.float8_e8m0fnu),
                 bias=bias,
@@ -121,7 +121,7 @@ def _addmm_mx_dispatch(
             assert gemm_choice is MXGemmKernelChoice.CUTLASS, "unsupported"
             # FP4 operations
             res = torchao.ops.mx_fp4_bf16(
-                a._data, b._data, a_scale_block, b_scale_block
+                a.qdata, b.qdata, a_scale_block, b_scale_block
             )
             # TODO add optional bias to kernel
             if bias is not None:
@@ -171,8 +171,8 @@ def mx_t(func, types, args, kwargs):
     # For now, only transpose(input, 0, 1) is supported.
     old = args[0]
     new = MXTensor(
+        old.qdata.t(),
         old._scale_e8m0,
-        old._data.t(),
         old._elem_dtype,
         old._block_size,
         old._orig_dtype,
@@ -205,7 +205,7 @@ def unwrap(x):
 
 @implements([aten.view.default])
 def mx_view_op(func, types, args, kwargs):
-    data = args[0]._data
+    data = args[0].qdata
     new_size = args[1]
     if args[0]._elem_dtype == torch.float4_e2m1fn_x2:
         # special case fp4 as we pack two elements per byte
@@ -215,8 +215,8 @@ def mx_view_op(func, types, args, kwargs):
         new_size = tensor_size_hpx3_to_fp6x4(new_size, data.is_contiguous())
     new_data = func(data, new_size, *args[2:], **kwargs)
     return MXTensor(
-        args[0]._scale_e8m0,
         new_data,
+        args[0]._scale_e8m0,
         args[0]._elem_dtype,
         args[0]._block_size,
         args[0]._orig_dtype,
@@ -241,7 +241,7 @@ def mx_slice(func, types, args, kwargs):
     if dim == 0:
         # Slicing along the first dimension (rows) TODO assuming that dim 1 is reduciton dim for now
         sliced_scale = aten.slice.Tensor(scale_shaped, dim, start, end, step)
-        sliced_data = aten.slice.Tensor(x._data, dim, start, end, step).unsqueeze(-1)
+        sliced_data = aten.slice.Tensor(x.qdata, dim, start, end, step).unsqueeze(-1)
     elif dim == 1:
         # Slicing along reduciton dim
         if start is not None:
@@ -256,7 +256,7 @@ def mx_slice(func, types, args, kwargs):
                 f"End index {end} must be a multiple of block_size {x._block_size}"
             )
 
-        sliced_data = aten.slice.Tensor(x._data, dim, start, end, step)
+        sliced_data = aten.slice.Tensor(x.qdata, dim, start, end, step)
 
         # Calculate which scale elements to keep
         start_block = 0 if start is None else start // x._block_size
@@ -276,8 +276,8 @@ def mx_slice(func, types, args, kwargs):
         args,
         kwargs,
         MXTensor(
-            sliced_scale,
             sliced_data,
+            sliced_scale,
             x._elem_dtype,
             x._block_size,
             x._orig_dtype,
@@ -330,8 +330,8 @@ def autocast_to_copy(func, types, args, kwargs):
     # If dtype is specified, create a new MXTensor with the requested dtype
     if dtype is not None:
         res = MXTensor(
+            tensor.qdata,
             tensor._scale_e8m0,
-            tensor._data,
             tensor._elem_dtype,
             tensor._block_size,
             dtype,
diff --git a/torchao/prototype/mx_formats/mx_tensor.py b/torchao/prototype/mx_formats/mx_tensor.py