No public description

tensorflower-gardener · tensorflower-gardener · commit 04b12d30237c · 2026-03-06T04:59:50.000-08:00
PiperOrigin-RevId: 879570384
diff --git a/official/modeling/activations/__init__.py b/official/modeling/activations/__init__.py
@@ -17,6 +17,7 @@
 from official.modeling.activations.mish import mish
 from official.modeling.activations.relu import relu6
 from official.modeling.activations.sigmoid import hard_sigmoid
+from official.modeling.activations.squared_relu import squared_relu
 from official.modeling.activations.swish import hard_swish
 from official.modeling.activations.swish import identity
 from official.modeling.activations.swish import simple_swish
diff --git a/official/modeling/activations/squared_relu.py b/official/modeling/activations/squared_relu.py
@@ -0,0 +1,31 @@
+# Copyright 2026 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Customized Squared ReLU activation."""
+
+import tensorflow as tf, tf_keras
+
+
+@tf_keras.utils.register_keras_serializable(package='Text')
+def squared_relu(features: tf.Tensor) -> tf.Tensor:
+  """Computes the Squared ReLU activation function.
+
+  Args:
+    features: A `Tensor` representing preactivation values.
+
+  Returns:
+    The activation value.
+  """
+  features_tensor = tf.convert_to_tensor(features)
+  return tf.math.square(tf.nn.relu(features_tensor))
diff --git a/official/modeling/activations/squared_relu_test.py b/official/modeling/activations/squared_relu_test.py
@@ -0,0 +1,37 @@
+# Copyright 2026 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for the customized Squared ReLU activation."""
+
+import numpy as np
+import tensorflow as tf, tf_keras
+
+from official.modeling import activations
+
+
+class CustomizedSquaredReluTest(tf.test.TestCase):
+
+  def _squared_relu_nn(self, x):
+    x = np.float32(x)
+    return tf.math.square(tf.nn.relu(x))
+
+  def test_squared_relu(self):
+    features = [[0.25, 0, -0.25], [-1, -2, 3]]
+    customized_squared_relu_data = activations.squared_relu(features)
+    squared_relu_data = self._squared_relu_nn(features)
+    self.assertAllClose(customized_squared_relu_data, squared_relu_data)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/official/modeling/tf_utils.py b/official/modeling/tf_utils.py
@@ -120,6 +120,7 @@ def get_activation(identifier, use_keras_layer=False, **kwargs):
           "hard_sigmoid": activations.hard_sigmoid,
           "mish": activations.mish,
           "gelu": functools.partial(tf.nn.gelu, **kwargs),
+          "squared_relu": activations.squared_relu,
       }
       if identifier in keras_layer_allowlist:
         return tf_keras.layers.Activation(keras_layer_allowlist[identifier])
@@ -131,6 +132,7 @@ def get_activation(identifier, use_keras_layer=False, **kwargs):
         "hard_sigmoid": activations.hard_sigmoid,
         "identity": activations.identity,
         "mish": activations.mish,
+        "squared_relu": activations.squared_relu,
     }
     if identifier in name_to_fn:
       return tf_keras.activations.get(name_to_fn[identifier])
diff --git a/official/nlp/modeling/layers/mobile_bert_layers.py b/official/nlp/modeling/layers/mobile_bert_layers.py
@@ -16,7 +16,6 @@
 import tensorflow as tf, tf_keras
 
 from official.modeling import tf_utils
-
 from official.nlp.modeling.layers import on_device_embedding
 from official.nlp.modeling.layers import position_embedding
 
@@ -288,11 +287,12 @@ def __init__(self,
       layer_name = layer_prefix + '/intermediate_dense'
       intermediate_layer = tf_keras.layers.EinsumDense(
           'abc,cd->abd',
-          activation=self.intermediate_act_fn,
+          activation=tf_utils.get_activation(self.intermediate_act_fn),
           output_shape=[None, self.intermediate_size],
           bias_axes='d',
           kernel_initializer=tf_utils.clone_initializer(self.initializer),
-          name=layer_name)
+          name=layer_name,
+      )
       layer_name = layer_prefix + '/output_dense'
       output_layer = tf_keras.layers.EinsumDense(
           'abc,cd->abd',
diff --git a/official/nlp/modeling/layers/mobile_bert_layers_test.py b/official/nlp/modeling/layers/mobile_bert_layers_test.py
@@ -42,15 +42,15 @@ def test_embedding_layer_with_token_type(self):
     output = layer(input_seq, token_type)
     output_shape = output.shape.as_list()
     expected_shape = [1, 4, 16]
-    self.assertListEqual(output_shape, expected_shape, msg=None)
+    self.assertListEqual(output_shape, expected_shape)
 
   def test_embedding_layer_without_token_type(self):
     layer = mobile_bert_layers.MobileBertEmbedding(10, 8, 2, 16)
     input_seq = tf.Variable([[2, 3, 4, 5]])
     output = layer(input_seq)
     output_shape = output.shape.as_list()
     expected_shape = [1, 4, 16]
-    self.assertListEqual(output_shape, expected_shape, msg=None)
+    self.assertListEqual(output_shape, expected_shape)
 
   def test_embedding_layer_get_config(self):
     layer = mobile_bert_layers.MobileBertEmbedding(
@@ -72,7 +72,7 @@ def test_no_norm(self):
     output = layer(feature)
     output_shape = output.shape.as_list()
     expected_shape = [2, 3, 4]
-    self.assertListEqual(output_shape, expected_shape, msg=None)
+    self.assertListEqual(output_shape, expected_shape)
 
   @parameterized.named_parameters(('with_kq_shared_bottleneck', False),
                                   ('without_kq_shared_bottleneck', True))
@@ -83,7 +83,17 @@ def test_transfomer_kq_shared_bottleneck(self, is_kq_shared):
     output = layer(feature)
     output_shape = output.shape.as_list()
     expected_shape = [2, 3, 512]
-    self.assertListEqual(output_shape, expected_shape, msg=None)
+    self.assertListEqual(output_shape, expected_shape)
+
+  def test_transformer_with_squared_relu(self):
+    feature = tf.random.uniform([2, 3, 512])
+    layer = mobile_bert_layers.MobileBertTransformer(
+        intermediate_act_fn='squared_relu'
+    )
+    output = layer(feature)
+    output_shape = output.shape.as_list()
+    expected_shape = [2, 3, 512]
+    self.assertListEqual(output_shape, expected_shape)
 
   def test_transfomer_with_mask(self):
     feature = tf.random.uniform([2, 3, 512])
@@ -94,7 +104,7 @@ def test_transfomer_with_mask(self):
     output = layer(feature, input_mask)
     output_shape = output.shape.as_list()
     expected_shape = [2, 3, 512]
-    self.assertListEqual(output_shape, expected_shape, msg=None)
+    self.assertListEqual(output_shape, expected_shape)
 
   def test_transfomer_return_attention_score(self):
     sequence_length = 5
@@ -104,8 +114,7 @@ def test_transfomer_return_attention_score(self):
         num_attention_heads=num_attention_heads)
     _, attention_score = layer(feature, return_attention_scores=True)
     expected_shape = [2, num_attention_heads, sequence_length, sequence_length]
-    self.assertListEqual(
-        attention_score.shape.as_list(), expected_shape, msg=None)
+    self.assertListEqual(attention_score.shape.as_list(), expected_shape)
 
   def test_transformer_get_config(self):
     layer = mobile_bert_layers.MobileBertTransformer(