keras-team · pass-lin · Apr 2, 2025 · Apr 2, 2025
diff --git a/keras_hub/src/utils/keras_utils.py b/keras_hub/src/utils/keras_utils.py
@@ -56,10 +56,9 @@ def standardize_data_format(data_format):
 
 
 def has_flash_attention_support():
-    if (
-        hasattr(keras.config, "is_flash_attention_enabled")
-        and keras.config.backend() == "jax"
-    ):
+    if not hasattr(keras.config, "is_flash_attention_enabled"):
+        return False
+    if keras.config.backend() == "jax":
         try:
             from jax.nn import dot_product_attention as dot_product_attention
         except ImportError:
@@ -70,6 +69,20 @@ def has_flash_attention_support():
             )
             return False
         return True
+    elif keras.config.backend() == "torch":
+        try:
+            from torch.backends.cuda import SDPAParams  # noqa: F401
+            from torch.backends.cuda import (
+                can_use_flash_attention,  # noqa: F401
+            )
+        except ImportError:
+            logging.warning(
+                "Flash attention is not supported in your current PyTorch "
+                "version. Please update it by following the official guide:"
+                "https://pytorch.org/get-started/locally/"
+            )
+            return False
+        return True
     else:
         return False