From 07dd9e1931f10633d11cfc6fa78413f71fe07cbe Mon Sep 17 00:00:00 2001
From: kaileiyx <kaileix.yang@intel.com>
Date: Mon, 24 Nov 2025 14:07:48 +0800
Subject: [PATCH 1/5] update skip list

---
 test/xpu/skip_list_common.py | 40 ++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index 9f810433bd..ea8ed29342 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -2,7 +2,11 @@
     "test_ops_xpu.py": (
         # Jiterator is only supported on CUDA and ROCm GPUs, none are available.
         # https://github.com/intel/torch-xpu-ops/issues/584
-        "_jiterator_",
+        "test_cow_input_jiterator_2inputs_2outputs_xpu_float32",
+        "test_cow_input_jiterator_4inputs_with_extra_args_xpu_float32",
+        "test_cow_input_jiterator_binary_return_by_ref_xpu_float32",
+        "test_cow_input_jiterator_binary_xpu_float32",
+        "test_cow_input_jiterator_unary_xpu_float32",
         # OPs not supported
         "test_errors_dot_xpu",
         "test_errors_vdot_xpu",
@@ -13,17 +17,23 @@
         "test_python_ref_executor__refs_pow_executor_aten_xpu_complex32",
         "test_python_ref_executor__refs_mul_executor_aten_xpu_complex32",
         # https://github.com/intel/torch-xpu-ops/issues/2254
-        "histogramdd",
-        "_vdot_",
-        "_dot_",
-        "_flash_attention_",
-        "_efficient_attention_",
+        "test_dtypes_histogramdd_xpu",
+        "test_cow_input_histogramdd_xpu_float32",
+        "test_operator_histogramdd_xpu_float32",
+        "test_view_replay_histogramdd_xpu_float32",
+        "test_neg_view_histogramdd_xpu_float64",
+        "test_comprehensive_vdot_xpu_complex128",
+        "test_comprehensive_vdot_xpu_complex64",
+        "test_quick_vdot_xpu_complex128",
+        "test_quick_vdot_xpu_complex64",
+        "test_variant_consistency_eager_nn_functional_scaled_dot_product_attention_xpu_float32",
+        "test_multiple_devices_linalg_multi_dot_xpu_int64",
+        "test_cow_input_linalg_multi_dot_xpu_float32",
+        "test_cow_input_nn_functional_scaled_dot_product_attention_xpu_float32",
     ),
     "test_binary_ufuncs_xpu.py": (
         "test_fmod_remainder_by_zero_integral_xpu_int64",  # zero division is an undefined behavior: different handles on different backends
         "test_div_rounding_numpy_xpu_float16",  # Calculation error. XPU implementation uses opmath type.
-        # AssertionError: Jiterator is only supported on CUDA and ROCm GPUs, none are available.
-        "_jiterator_",
     ),
     "test_scatter_gather_ops_xpu.py": (
         # AssertionError: Tensor-likes are not equal!
@@ -469,11 +479,6 @@
     "test_foreach_xpu.py": (
         # RuntimeError: Tried to instantiate dummy base class CUDAGraph
         "use_cuda_graph_True",
-        # randomly fails
-        "test_parity__foreach_div_fastpath_inplace_xpu_complex128",
-        "test_parity__foreach_div_fastpath_outplace_xpu_complex128",
-        "test_parity__foreach_addcdiv_fastpath_inplace_xpu_complex128",
-        "test_parity__foreach_addcdiv_fastpath_outplace_xpu_complex128",
     ),
     "nn/test_convolution_xpu.py": (
         # Summary: all of them are oneDNN related issues
@@ -764,7 +769,6 @@
         #  AssertionError: Tensor-likes are not close! ; Exception: Tensor-likes are not close!
         "test_comprehensive_nn_functional_instance_norm_xpu_float64",
         #  RuntimeError: Difference from float64 is larger with decomposition nll_loss_forward.default than original on output 0.
-        "test_comprehensive_nn_functional_nll_loss_xpu_float16",
         "test_comprehensive_nn_functional_pad_reflect_xpu_bfloat16",
         #  NotImplementedError: Could not run 'aten::_flash_attention_forward' with arguments from the 'CPU' backend.
         "test_comprehensive_torch_ops_aten__flash_attention_forward_xpu_float16",
@@ -792,14 +796,6 @@
         #  RuntimeError: I got this output for HasDecompTest.test_aten_core_operators:
         "test_aten_core_operators",
         "test_has_decomposition",
-        #  AssertionError: Tensor-likes are not close!
-        "test_comprehensive_diff_xpu_complex128",
-        "test_comprehensive_ormqr_xpu_complex128",
-        "test_quick_var_mean_xpu_float64",
-        "test_comprehensive_diff_xpu_complex64",
-        "test_comprehensive_ormqr_xpu_complex64",
-        "test_quick_mean_xpu_complex128",
-        "test_comprehensive_grid_sampler_2d_xpu_bfloat16",
     ),
     "functorch/test_ops_xpu.py": None,
     "test_sparse_xpu.py": None,

From 227137386cc983053e47d601fa3afa948133f171 Mon Sep 17 00:00:00 2001
From: kaileiyx <kaileix.yang@intel.com>
Date: Mon, 24 Nov 2025 14:28:40 +0800
Subject: [PATCH 2/5] update

---
 test/xpu/skip_list_common.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index ea8ed29342..e4cdfa66f2 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -22,10 +22,6 @@
         "test_operator_histogramdd_xpu_float32",
         "test_view_replay_histogramdd_xpu_float32",
         "test_neg_view_histogramdd_xpu_float64",
-        "test_comprehensive_vdot_xpu_complex128",
-        "test_comprehensive_vdot_xpu_complex64",
-        "test_quick_vdot_xpu_complex128",
-        "test_quick_vdot_xpu_complex64",
         "test_variant_consistency_eager_nn_functional_scaled_dot_product_attention_xpu_float32",
         "test_multiple_devices_linalg_multi_dot_xpu_int64",
         "test_cow_input_linalg_multi_dot_xpu_float32",

From eaf359e674c682f5d8936eb40fd72d76aaa025ec Mon Sep 17 00:00:00 2001
From: kaileiyx <kaileix.yang@intel.com>
Date: Tue, 25 Nov 2025 09:58:29 +0800
Subject: [PATCH 3/5] update

---
 test/xpu/skip_list_common.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index e4cdfa66f2..1cc5ed3549 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -2,11 +2,7 @@
     "test_ops_xpu.py": (
         # Jiterator is only supported on CUDA and ROCm GPUs, none are available.
         # https://github.com/intel/torch-xpu-ops/issues/584
-        "test_cow_input_jiterator_2inputs_2outputs_xpu_float32",
-        "test_cow_input_jiterator_4inputs_with_extra_args_xpu_float32",
-        "test_cow_input_jiterator_binary_return_by_ref_xpu_float32",
-        "test_cow_input_jiterator_binary_xpu_float32",
-        "test_cow_input_jiterator_unary_xpu_float32",
+        "_jiterator_"
         # OPs not supported
         "test_errors_dot_xpu",
         "test_errors_vdot_xpu",
@@ -21,15 +17,39 @@
         "test_cow_input_histogramdd_xpu_float32",
         "test_operator_histogramdd_xpu_float32",
         "test_view_replay_histogramdd_xpu_float32",
+        "test_noncontiguous_samples_histogramdd_xpu_float32",
+        "test_out_histogramdd_xpu_float32",
+        "test_variant_consistency_eager_histogramdd_xpu_float32",
+        "test_out_warning_histogramdd_xpu",
         "test_neg_view_histogramdd_xpu_float64",
+        "test_python_ref_torch_fallback__refs_vdot_xpu_complex128",
+        "test_python_ref_torch_fallback__refs_vdot_xpu_complex64",
+        "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex128",
+        "test_python_ref_executor__refs_vdot_executor_aten_xpu_complex64",
+        "test_dtypes_vdot_xpu",
         "test_variant_consistency_eager_nn_functional_scaled_dot_product_attention_xpu_float32",
         "test_multiple_devices_linalg_multi_dot_xpu_int64",
         "test_cow_input_linalg_multi_dot_xpu_float32",
         "test_cow_input_nn_functional_scaled_dot_product_attention_xpu_float32",
+        "test_dtypes_dot_xpu",
+        "test_dtypes_linalg_multi_dot_xpu",
+        "test_dtypes_torch_ops_aten__efficient_attention_forward_xpu",
+        "test_fake_crossref_backward_no_amp_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_cow_input_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_out_warning_torch_ops_aten__efficient_attention_forward_xpu",
+        "test_variant_consistency_eager_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_fake_crossref_backward_amp_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_out_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_view_replay_torch_ops_aten__efficient_attention_forward_xpu_float32",
+        "test_out_torch_ops_aten__flash_attention_forward_xpu_float16",
+        "test_out_warning_torch_ops_aten__flash_attention_forward_xpu",
+        "test_dtypes_torch_ops_aten__flash_attention_forward_xpu",
     ),
     "test_binary_ufuncs_xpu.py": (
         "test_fmod_remainder_by_zero_integral_xpu_int64",  # zero division is an undefined behavior: different handles on different backends
         "test_div_rounding_numpy_xpu_float16",  # Calculation error. XPU implementation uses opmath type.
+        # AssertionError: Jiterator is only supported on CUDA and ROCm GPUs, none are available.
+        "_jiterator_",
     ),
     "test_scatter_gather_ops_xpu.py": (
         # AssertionError: Tensor-likes are not equal!

From 7b27cc3437c74e95df08bc4aff90847e3fd288d3 Mon Sep 17 00:00:00 2001
From: kaileiyx <kaileix.yang@intel.com>
Date: Tue, 25 Nov 2025 10:00:57 +0800
Subject: [PATCH 4/5] update

---
 test/xpu/skip_list_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index 1cc5ed3549..58ab2813d3 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -2,7 +2,7 @@
     "test_ops_xpu.py": (
         # Jiterator is only supported on CUDA and ROCm GPUs, none are available.
         # https://github.com/intel/torch-xpu-ops/issues/584
-        "_jiterator_"
+        "_jiterator_",
         # OPs not supported
         "test_errors_dot_xpu",
         "test_errors_vdot_xpu",

From 14ef4bec3d92ef9c5c741a13e627101c4bf01c21 Mon Sep 17 00:00:00 2001
From: kaileiyx <kaileix.yang@intel.com>
Date: Thu, 27 Nov 2025 15:06:31 +0800
Subject: [PATCH 5/5] remove 4 random failures

---
 .github/scripts/ut_result_check.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh
index a94007da11..baba794944 100644
--- a/.github/scripts/ut_result_check.sh
+++ b/.github/scripts/ut_result_check.sh
@@ -21,10 +21,6 @@ declare -A EXPECTED_CASES=(
 # Tests that are known to randomly pass and should be ignored when detecting new passes
 # These are typically flaky tests that don't indicate real improvements
 IGNORE_TESTS=(
-    "test_parity__foreach_div_fastpath_inplace_xpu_complex128"
-    "test_parity__foreach_div_fastpath_outplace_xpu_complex128"
-    "test_parity__foreach_addcdiv_fastpath_inplace_xpu_complex128"
-    "test_parity__foreach_addcdiv_fastpath_outplace_xpu_complex128"
     "test_python_ref__refs_log2_xpu_complex128"
     "_jiterator_"  # Pattern to match all jiterator tests
 )