Internal change

hejiang0116 · Orbax Authors · commit 7f71b1ba3659 · 2025-11-24T09:17:19.000-08:00
PiperOrigin-RevId: 834940834
diff --git a/export/orbax/export/constants.py b/export/orbax/export/constants.py
@@ -97,9 +97,18 @@ class ExportModelType(enum.Enum):
 # Mesh for the model.
 JAX_MESH = 'jax_mesh'
 
+# TODO: b/459991985 - Remove this flag and use PERSIST_XLA_FLAGS instead.
 # Whether to strip XLA flags from the model.
 STRIP_XLA_FLAGS = 'strip_xla_flags'
 
+# Whether to persist XLA flags in the model.
+PERSIST_XLA_FLAGS = 'persist_xla_flags'
+
+# Whether to enable bf16 optimization for the model.
+# TODO_REGEX: b/422170690: (1): Apply this flag to the pre/post processors. (2):
+# Adding filter flags once the flag is applied to the pre/post processors.
+ENABLE_BF16_OPTIMIZATION = 'enable_bf16_optimization'
+
 ################################################################################
 # Proto field names
 ################################################################################
diff --git a/export/orbax/export/data_processors/tf_data_processor_test.py b/export/orbax/export/data_processors/tf_data_processor_test.py
@@ -56,26 +56,23 @@ def test_output_signature_raises_error_without_calling_prepare(self):
       _ = processor.output_signature
 
   def test_prepare_fails_with_multiple_calls(self):
-    processor = tf_data_processor.TfDataProcessor(lambda x: x)
+    processor = tf_data_processor.TfDataProcessor(lambda x: x, name='add')
     processor.prepare(
-        'add',
-        input_signature=(tf.TensorSpec([None, 3], tf.float32),),
+        (tf.TensorSpec([None, 3], tf.float32),),
     )
     with self.assertRaisesWithLiteralMatch(
         RuntimeError, '`prepare()` can only be called once.'
     ):
       processor.prepare(
-          'add',
-          input_signature=(tf.TensorSpec([None, 3], tf.float32),),
+          (tf.TensorSpec([None, 3], tf.float32),),
       )
 
   def test_prepare_succeeds(self):
     processor = tf_data_processor.TfDataProcessor(
-        tf.function(lambda x, y: x + y)
+        tf.function(lambda x, y: x + y), name='add'
     )
     processor.prepare(
-        'add',
-        input_signature=(
+        (
             tf.TensorSpec([None, 3], tf.float64),
             tf.TensorSpec([None, 3], tf.float64),
         ),
@@ -107,10 +104,11 @@ def test_prepare_polymorphic_function_with_default_input_signature(self):
     def preprocessor_callable(x, y):
       return x + y
 
-    processor = tf_data_processor.TfDataProcessor(preprocessor_callable)
+    processor = tf_data_processor.TfDataProcessor(
+        preprocessor_callable, name='add'
+    )
     processor.prepare(
-        'add',
-        input_signature=(
+        (
             tf.TensorSpec([None, 3], tf.float32),
             tf.TensorSpec([None, 3], tf.float32),
         ),
@@ -136,25 +134,27 @@ def test_suppress_x64_output(self):
     processor = tf_data_processor.TfDataProcessor(
         tf.function(
             lambda x, y: tf.cast(x, tf.float64) + tf.cast(y, tf.float64)
-        )
+        ),
+        name='add_f64',
     )
     input_signature = (
         tf.TensorSpec([None, 3], tf.float32),
         tf.TensorSpec([None, 3], tf.float32),
     )
 
     # With suppress_x64_output=True, f64 output is suppressed to f32.
-    processor.prepare('add_f64', input_signature, suppress_x64_output=True)
+    processor.prepare(input_signature, suppress_x64_output=True)
     self.assertEqual(
         processor.output_signature,
         obm.ShloTensorSpec(shape=(None, 3), dtype=obm.ShloDType.f32),
     )
 
   def test_convert_to_bfloat16(self):
-    processor = tf_data_processor.TfDataProcessor(lambda x: 0.5 + x)
+    processor = tf_data_processor.TfDataProcessor(
+        lambda x: 0.5 + x, name='preprocessor'
+    )
     processor.prepare(
-        'preprocessor',
-        input_signature=(tf.TensorSpec((), tf.float32)),
+        (tf.TensorSpec((), tf.float32)),
         bfloat16_options=converter_options_v2_pb2.ConverterOptionsV2(
             bfloat16_optimization_options=converter_options_v2_pb2.BFloat16OptimizationOptions(
                 scope=converter_options_v2_pb2.BFloat16OptimizationOptions.ALL,
@@ -168,15 +168,16 @@ def test_convert_to_bfloat16(self):
     )
 
   def test_bfloat16_convert_error(self):
-    processor = tf_data_processor.TfDataProcessor(lambda x: 0.5 + x)
+    processor = tf_data_processor.TfDataProcessor(
+        lambda x: 0.5 + x, name='preprocessor'
+    )
     with self.assertRaisesRegex(
         google_error.StatusNotOk,
         'Found bfloat16 ops in the model. The model may have been converted'
         ' before. It should not be converted again.',
     ):
       processor.prepare(
-          'preprocessor',
-          input_signature=(tf.TensorSpec((), tf.bfloat16)),
+          (tf.TensorSpec((), tf.bfloat16)),
           bfloat16_options=converter_options_v2_pb2.ConverterOptionsV2(
               bfloat16_optimization_options=converter_options_v2_pb2.BFloat16OptimizationOptions(
                   scope=converter_options_v2_pb2.BFloat16OptimizationOptions.ALL,
@@ -185,12 +186,9 @@ def test_bfloat16_convert_error(self):
       )
 
   def test_prepare_with_shlo_bf16_inputs(self):
-    processor = tf_data_processor.TfDataProcessor(lambda x: x)
+    processor = tf_data_processor.TfDataProcessor(lambda x: x, name='identity')
     processor.prepare(
-        'identity',
-        input_signature=(
-            obm.ShloTensorSpec(shape=(1,), dtype=obm.ShloDType.bf16),
-        ),
+        (obm.ShloTensorSpec(shape=(1,), dtype=obm.ShloDType.bf16),),
     )
     self.assertEqual(
         processor.concrete_function.structured_input_signature[0][0].dtype,
diff --git a/export/orbax/export/jax_module.py b/export/orbax/export/jax_module.py
@@ -197,6 +197,16 @@ def jax2tf_kwargs_map(self) -> Mapping[str, Any]:
         tensorflow_module.TensorFlowModule, self._export_module
     ).jax2tf_kwargs_map
 
+  @property
+  def jax2obm_kwargs(self) -> Mapping[str, Any]:
+    """Returns the jax2obm_kwargs."""
+    if self._export_version == constants.ExportModelType.TF_SAVEDMODEL:
+      raise TypeError(
+          'jax2obm_kwargs is not implemented for export version'
+          ' ExportModelType.TF_SAVEDMODEL.'
+      )
+    return cast(obm_module.ObmModule, self._export_module).jax2obm_kwargs
+
   @property
   def input_polymorphic_shape_map(self) -> Mapping[str, PyTree]:
     """Returns the polymorphic shapes."""
diff --git a/export/orbax/export/modules/obm_module.py b/export/orbax/export/modules/obm_module.py
@@ -73,34 +73,43 @@ def __init__(
       )
 
     # It is possible for jax2obm_kwargs to be None if the key is present.
-    if not jax2obm_kwargs:
-      jax2obm_kwargs = {}
 
+    self._jax2obm_kwargs = jax2obm_kwargs if jax2obm_kwargs else {}
+
+    enable_bf16_optimization = self.jax2obm_kwargs.get(
+        constants.ENABLE_BF16_OPTIMIZATION, False
+    )
+
+    if enable_bf16_optimization:
+      mapped_apply_fn = utils.to_bfloat16(apply_fn)
+      self._params_args_spec = utils.to_bfloat16(params)
+    else:
+      mapped_apply_fn = apply_fn
+      self._params_args_spec = params
     (
         self._apply_fn_map,
         self.input_polymorphic_shape_map,
         self.input_polymorphic_shape_symbol_values_map,
     ) = self._normalize_apply_fn_map(
-        apply_fn,
+        mapped_apply_fn,
         input_polymorphic_shape,
         input_polymorphic_shape_symbol_values,
     )
 
-    self._jax_mesh = jax2obm_kwargs.get(constants.JAX_MESH, None)
-    self._strip_xla_flags = jax2obm_kwargs.get(constants.STRIP_XLA_FLAGS, False)
-
-    self.polymorphic_constraints = self._maybe_set_polymorphic_constraints(
-        jax2obm_kwargs
+    self._jax_mesh = self.jax2obm_kwargs.get(constants.JAX_MESH, None)
+    self._strip_xla_flags = self.jax2obm_kwargs.get(
+        constants.STRIP_XLA_FLAGS, False
     )
+
+    self.polymorphic_constraints = self._maybe_set_polymorphic_constraints()
     self._native_serialization_platforms = utils.get_lowering_platforms(
-        jax2obm_kwargs
+        self.jax2obm_kwargs
     )
-    self._params_args_spec = params
 
     self._checkpoint_path: str = None
     # Set the Orbax checkpoint path if provided in the jax2obm_kwargs.
-    self._maybe_set_orbax_checkpoint_path(jax2obm_kwargs)
-    self._load_all_checkpoint_weights = jax2obm_kwargs.get(
+    self._maybe_set_orbax_checkpoint_path(self.jax2obm_kwargs)
+    self._load_all_checkpoint_weights = self.jax2obm_kwargs.get(
         constants.LOAD_ALL_CHECKPOINT_WEIGHTS, False
     )
 
@@ -203,15 +212,9 @@ def _maybe_set_orbax_checkpoint_path(self, jax2obm_kwargs):
         else constants.DEFAULT_WEIGHTS_NAME
     )
 
-  def _maybe_set_polymorphic_constraints(
-      self, jax2obm_kwargs
-  ) -> Mapping[str, Sequence[Any]]:
+  def _maybe_set_polymorphic_constraints(self) -> Mapping[str, Sequence[Any]]:
     """Sets the polymorphic constraints for the model.
 
-    Args:
-      jax2obm_kwargs: A dictionary of kwargs to pass to the jax2obm conversion
-        library.
-
     Returns:
       A mapping of function name to polymorphic constraints.
 
@@ -221,7 +224,7 @@ def _maybe_set_polymorphic_constraints(
         size of the apply_fn_map or if a key in apply_fn_map is not found in
         polymorphic_constraints.
     """
-    polymorphic_constraints = jax2obm_kwargs.get(
+    polymorphic_constraints = self.jax2obm_kwargs.get(
         constants.POLYMORPHIC_CONSTRAINTS, None
     )
     if not isinstance(polymorphic_constraints, Mapping):
@@ -300,3 +303,8 @@ def methods(self) -> Mapping[str, Callable[..., Any]]:
   def jax_methods(self) -> Mapping[str, Callable[..., Any]]:
     """Named methods in JAX context for validation."""
     raise NotImplementedError('apply_fn_map is not implemented for ObmModule.')
+
+  @property
+  def jax2obm_kwargs(self) -> Mapping[str, Any]:
+    """Returns the jax2obm_kwargs."""
+    return self._jax2obm_kwargs
diff --git a/export/orbax/export/modules/obm_module_test.py b/export/orbax/export/modules/obm_module_test.py
@@ -357,6 +357,32 @@ def test_obm_module_multiple_apply_fns(
             jax2obm_kwargs=jax2obm_kwargs,
         )
 
+  @parameterized.named_parameters(
+      {'testcase_name': 'enable_bf16', 'enable_bf16_optimization': True},
+      {'testcase_name': 'disable_bf16', 'enable_bf16_optimization': False},
+  )
+  def test_obm_module_bfloat16_conversion(self, enable_bf16_optimization):
+    params_spec = {
+        'w': jax.ShapeDtypeStruct((2, 2), jnp.float32),
+        'b': jax.ShapeDtypeStruct((2,), jnp.float32),
+    }
+    input_spec = {constants.DEFAULT_METHOD_KEY: 'b, ...'}
+
+    module = obm_module.ObmModule(
+        params=params_spec,
+        apply_fn=_linear,
+        input_polymorphic_shape=input_spec,
+        jax2obm_kwargs={
+            constants.ENABLE_BF16_OPTIMIZATION: enable_bf16_optimization
+        },
+    )
+
+    expected_dtype = jnp.bfloat16 if enable_bf16_optimization else jnp.float32
+    with self.subTest('test_weights_w_dtype'):
+      self.assertEqual(module.model_params['w'].dtype, expected_dtype)
+    with self.subTest('test_weights_b_dtype'):
+      self.assertEqual(module.model_params['b'].dtype, expected_dtype)
+
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/export/orbax/export/oex_orchestration.py b/export/orbax/export/oex_orchestration.py
@@ -14,7 +14,11 @@
 
 """Pipeline: pre-processor + model-function + post-processor."""
 
+import dataclasses
 from typing import Any, Dict, List, Sequence, Tuple, TypeVar
 
 from absl import logging
 import jax
+import jaxtyping
+from orbax.export.data_processors import data_processor_base
+from orbax.export.modules import obm_module
diff --git a/export/orbax/export/serving_config.py b/export/orbax/export/serving_config.py
@@ -21,6 +21,7 @@
 import jax
 import jaxtyping
 from orbax.export.data_processors import data_processor_base
+from orbax.export.data_processors import tf_data_processor
 import tensorflow as tf
 
 
@@ -108,6 +109,32 @@ def get_signature_keys(self) -> Sequence[str]:
     else:
       return self.signature_key
 
+  def get_preprocessors(self) -> Sequence[data_processor_base.DataProcessor]:
+    """Returns the preprocessors for this serving config."""
+    if self.preprocessors:
+      return self.preprocessors
+    elif self.tf_preprocessor:
+      return [
+          tf_data_processor.TfDataProcessor(
+              self.tf_preprocessor,
+          )
+      ]
+    else:
+      return []
+
+  def get_postprocessors(self) -> Sequence[data_processor_base.DataProcessor]:
+    """Returns the postprocessors for this serving config."""
+    if self.postprocessors:
+      return self.postprocessors
+    elif self.tf_postprocessor:
+      return [
+          tf_data_processor.TfDataProcessor(
+              self.tf_postprocessor,
+          )
+      ]
+    else:
+      return []
+
   def get_input_signature(self, required=True) -> Any:
     """Gets the input signature from the explict one or tf_preprocessor."""
     input_signature = self.input_signature
diff --git a/export/orbax/export/utils.py b/export/orbax/export/utils.py
@@ -18,6 +18,7 @@
 import dataclasses
 import functools
 import inspect
+import jax.numpy as jnp
 import os
 from typing import Any, Callable, List, Optional, Tuple, Union
 
@@ -532,3 +533,40 @@ def get_lowering_platforms(
     )
 
   return native_serialization_platforms
+
+
+def to_bfloat16(x: Any) -> Any:
+  """Helper to convert leaves of a pytree to bfloat16.
+
+  It handles `float`, `jax.ShapeDtypeStruct`, and other array-like objects with
+  a floating point `dtype`.
+
+  Args:
+    x: The input pytree to convert.
+
+  Returns:
+    The input `x` with floating point values converted to `jnp.bfloat16`.
+  """
+
+  def _to_bfloat16_leaf(x: Any) -> Any:
+    if isinstance(x, jax.ShapeDtypeStruct) and jnp.issubdtype(
+        x.dtype, jnp.floating
+    ):
+      return jax.ShapeDtypeStruct(
+          x.shape,
+          jnp.bfloat16,
+          sharding=x.sharding,
+      )
+    if isinstance(x, jax.ShapeDtypeStruct):
+      return x
+    if hasattr(x, 'dtype') and jnp.issubdtype(x.dtype, jnp.floating):
+      return x.astype(jnp.bfloat16)
+    if isinstance(x, float):
+      return jnp.bfloat16(x)
+    return x
+
+  flattened_x, treedef = jax.tree_util.tree_flatten(x)
+  flattened_y = [
+      jax.tree_util.tree_map(_to_bfloat16_leaf, y) for y in flattened_x
+  ]
+  return jax.tree_util.tree_unflatten(treedef, flattened_y)
diff --git a/model/orbax/experimental/model/cli/README.md b/model/orbax/experimental/model/cli/README.md
@@ -2,6 +2,7 @@
 
 A command-line tool for inspecting Orbax models.
 
+
 ## Examples
 
 To inspect the model: