From 9a0d958a297097d146eac635d9322e476adcb6db Mon Sep 17 00:00:00 2001 From: Zonglin Peng Date: Fri, 31 Oct 2025 08:25:29 -0700 Subject: [PATCH] div.Tensor_mode, [Jarvis][Nightly] address error in jarvis-nightly-operators-test-aten-leaky-relu-out, div_tensor, jarvis-nightly-operators-test-aten-permute-copy-out, jarvis-nightly-operators-test-aten-flip-out, jarvis-nightly-operators-test-aten... Summary: ...-constant-pad-nd-out, jarvis-nightly-operators-test-aten-where-out, jarvis-nightly-operators-test-aten-clamp-out https://docs.google.com/spreadsheets/d/12DsKcvPcGgxnZ8shgn6j8PmoOQUfy5GgUg974g1iO18/edit?gid=0#gid=0 Differential Revision: D85957619 --- backends/cadence/utils/facto_util.py | 174 ++++++++++++++++++++------- 1 file changed, 131 insertions(+), 43 deletions(-) diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py index e49cf412c19..3c1f3c10036 100644 --- a/backends/cadence/utils/facto_util.py +++ b/backends/cadence/utils/facto_util.py @@ -15,6 +15,7 @@ import torch from facto.inputgen.argtuple.gen import ArgumentTupleGenerator from facto.inputgen.specs.model import ConstraintProducer as cp +from facto.inputgen.utils.random_manager import seeded_random_manager as rm from facto.inputgen.variable.type import ScalarDtype from facto.specdb.db import SpecDictDB @@ -26,6 +27,33 @@ _shape_cache: dict[str, list[int]] = {} +def _positive_valid_dim_list(tensor: torch.Tensor, length: int) -> set[tuple[int, ...]]: + """ + Generate valid permutations using only positive dimension indices. + This is required for Cadence/Xtensa kernels that don't support negative indexing. + + Args: + tensor: Input tensor to generate permutations for + length: Number of dimensions in the permutation (must equal tensor.dim()) + + Returns: + Set of valid permutation tuples containing only positive indices [0, rank-1] + """ + if length > tensor.dim(): + return set() + + n = tensor.dim() + pool = list(range(n)) + + # Generate multiple valid permutations (only positive indices) + permutations: set[tuple[int, ...]] = set() + for _ in range(3): # Generate 3 different permutations for diversity + perm = tuple(rm.get_random().sample(pool, length)) + permutations.add(perm) + + return permutations + + def apply_tensor_contraints(op_name: str, index: int) -> list[object]: # Constraint to limit tensor size to < 4000 bytes with fully randomized shapes import random @@ -161,47 +189,31 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: if index == 0: # condition tensor_constraints = [ cp.Dtype.In(lambda deps: [torch.bool]), - cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), - cp.Value.Le(lambda deps, dtype, struct: 2**4), + cp.Value.Ge(lambda deps, dtype, struct: 0), + cp.Value.Le(lambda deps, dtype, struct: 1), cp.Rank.Ge(lambda deps: 1), cp.Size.Ge(lambda deps, r, d: 1), max_size_constraint, ] elif index == 1: # input tensor(a) tensor_constraints = [ - cp.Dtype.In( - lambda deps: [ - torch.int8, - torch.int16, - torch.uint8, - torch.uint16, - torch.int32, - torch.float32, - ] - ), + cp.Dtype.In(lambda deps: [torch.float32]), cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), cp.Value.Le(lambda deps, dtype, struct: 2**4), cp.Rank.Ge(lambda deps: 1), cp.Size.Ge(lambda deps, r, d: 1), + cp.Size.In(lambda deps, r, d: fn.broadcast_with(deps[0].shape, r, d)), max_size_constraint, ] else: # input tensor(b) tensor_constraints = [ - cp.Dtype.In( - lambda deps: [ - torch.int8, - torch.int16, - torch.uint8, - torch.uint16, - torch.int32, - torch.float32, - ] - ), + cp.Dtype.In(lambda deps: [torch.float32]), cp.Dtype.Eq(lambda deps: deps[1].dtype), cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), cp.Value.Le(lambda deps, dtype, struct: 2**4), cp.Rank.Ge(lambda deps: 1), cp.Size.Ge(lambda deps, r, d: 1), + cp.Size.In(lambda deps, r, d: fn.broadcast_with(fn.broadcasted_shape(deps[0].shape, deps[1].shape), r, d)), max_size_constraint, ] case "embedding.default": @@ -248,6 +260,9 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: tensor_constraints.extend( [ cp.Dtype.In(lambda deps: [torch.float32, torch.int32]), + # Avoid NaN/Inf values that expose clamp NaN handling bugs + cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), + cp.Value.Le(lambda deps, dtype, struct: 2**4), ] ) case "rsqrt.default": @@ -323,12 +338,15 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: ] ) case "constant_pad_nd.default": - tensor_constraints.extend( - [ - cp.Dtype.In(lambda deps: [torch.float32]), - cp.Size.Le(lambda deps, r, d: 2**2), - ] - ) + tensor_constraints = [ + cp.Dtype.In(lambda deps: [torch.float32]), + cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), + cp.Value.Le(lambda deps, dtype, struct: 2**4), + cp.Rank.Ge(lambda deps: 1), + cp.Rank.Le(lambda deps: 2), # Reduced from 3 to 2 (max 2D tensors) + cp.Size.Ge(lambda deps, r, d: 1), + cp.Size.Le(lambda deps, r, d: 3), # Max dimension size of 3 + ] case "avg_pool2d.default": tensor_constraints.extend( [ @@ -344,14 +362,23 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: ] ) case "div.Tensor": - tensor_constraints.extend( - [ - cp.Value.Ne(lambda deps, dtype, struct: 0), - cp.Value.Le(lambda deps, dtype, struct: 2**3), - cp.Size.Le(lambda deps, r, d: 2**3), - cp.Rank.Le(lambda deps: 2**2), - ] - ) + if index == 1: # Only apply zero-prevention to divisor + tensor_constraints.extend( + [ + cp.Value.Ne(lambda deps, dtype, struct: 0), # Prevent division by zero + cp.Value.Le(lambda deps, dtype, struct: 2**3), + cp.Size.Le(lambda deps, r, d: 2**3), + cp.Rank.Le(lambda deps: 2**2), + ] + ) + else: + tensor_constraints.extend( + [ + cp.Value.Le(lambda deps, dtype, struct: 2**3), + cp.Size.Le(lambda deps, r, d: 2**3), + cp.Rank.Le(lambda deps: 2**2), + ] + ) case "pow.Tensor_Scalar": tensor_constraints.extend( [ @@ -373,6 +400,7 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: cp.Dtype.In(lambda deps: [torch.int64, torch.int32, torch.float32]), cp.Value.Ge(lambda deps, dtype, struct: -(2**4)), cp.Value.Le(lambda deps, dtype, struct: 2**4), + cp.Value.Ne(lambda deps, dtype, struct: 0), # Prevent division by zero cp.Rank.Ge(lambda deps: 1), cp.Rank.Eq(lambda deps: deps[0].dim()), cp.Size.Eq(lambda deps, r, d: fn.safe_size(deps[0], d)), @@ -389,6 +417,12 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: cp.Value.Le(lambda deps, dtype, struct: 2**2), cp.Size.Le(lambda deps, r, d: 2**3), ] + case "leaky_relu.default": + tensor_constraints.extend( + [ + cp.Dtype.In(lambda deps: [torch.float32]), + ] + ) case "_softmax.default": tensor_constraints.extend( [ @@ -396,6 +430,12 @@ def random_size_constraint(deps: object, r: int, d: int) -> int: cp.Size.Le(lambda deps, r, d: 2**2), ] ) + case "flip.default": + tensor_constraints.extend( + [ + cp.Dtype.In(lambda deps: [torch.float32]), + ] + ) case _: pass return tensor_constraints @@ -409,6 +449,7 @@ def apply_scalar_contraints(op_name: str) -> list[ScalarDtype]: | "mul.Scalar" | "div.Scalar" | "constant_pad_nd.default" + | "clamp.default" ): return [ScalarDtype.int] case "full.default": @@ -436,11 +477,40 @@ def facto_testcase_gen( # noqa: C901 cp.Size.Le(lambda deps, r, d: 2**2), ] ) - if in_spec.name == "max_val": # hardtanh + # Special handling for clamp.default to ensure min < max with sufficient gap (at least 2) and never None + if op_name == "clamp.default": + if in_spec.name == "min": + # min must always be provided (not None) and bounded, leave room for max + spec.inspec[index].constraints.extend( + [ + cp.Optional.Eq(lambda deps: False), # Never None + cp.Value.Ge(lambda deps, dtype: -(2**4)), + cp.Value.Le(lambda deps, dtype: 2**4 - 2), # Leave room for max (at least 2 units) + ] + ) + elif in_spec.name == "max": + # max must always be provided (not None), be >= min + 2 (sufficient gap), and bounded + spec.inspec[index].deps = [0, 1] # deps on input tensor and min + spec.inspec[index].constraints.extend( + [ + cp.Optional.Eq(lambda deps: False), # Never None + cp.Value.Ge(lambda deps, dtype: deps[1] + 2), # max >= min + 2 (sufficient gap) + cp.Value.Le(lambda deps, dtype: 2**4), + ] + ) + elif in_spec.name == "max_val": # hardtanh spec.inspec[index].deps = [0, 1] spec.inspec[index].constraints.extend( [cp.Value.Ge(lambda deps, _: deps[1])] ) + elif in_spec.name == "negative_slope" and op_name == "leaky_relu.default": + # For leaky_relu, negative_slope should be in typical range (0, 1] + spec.inspec[index].constraints.extend( + [ + cp.Value.Gt(lambda deps, dtype: 0), + cp.Value.Le(lambda deps, dtype: 1.0), + ] + ) else: spec.inspec[index].constraints.extend( [ @@ -465,12 +535,30 @@ def facto_testcase_gen( # noqa: C901 apply_tensor_contraints(op_name, index) ) elif in_spec.type.is_dim_list(): - spec.inspec[index].constraints.extend( - [ - cp.Length.Ge(lambda deps: 1), - cp.Optional.Eq(lambda deps: False), - ] - ) + # Special handling for permute_copy.default to ensure valid permutation + if op_name == "permute_copy.default": + spec.inspec[index].constraints.extend( + [ + cp.Length.Ge(lambda deps: 1), + cp.Length.Eq(lambda deps: deps[0].dim()), # Must be a complete permutation + cp.Optional.Eq(lambda deps: False), + # Generate valid permutations using only positive indices + # Cadence/Xtensa hardware kernels do not support negative dimension indices + cp.Value.Gen( + lambda deps, length: ( + _positive_valid_dim_list(deps[0], length), + fn.invalid_dim_list(deps[0], length), + ) + ), + ] + ) + else: + spec.inspec[index].constraints.extend( + [ + cp.Length.Ge(lambda deps: 1), + cp.Optional.Eq(lambda deps: False), + ] + ) elif in_spec.type.is_bool(): spec.inspec[index].constraints.extend( [