Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions deepspeed/runtime/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,14 @@
#########################################
# Gradient clipping
#########################################
# Gradient clipping. By default, this feature is not enabled.
# Users can configure in ds_config.json as below example:
# Gradient clipping. By default, this feature is enabled with a value of 1.0.
# Users can configure in ds_config.json as below example (set to 0.0 to disable):
GRADIENT_CLIPPING_FORMAT = '''
Gradient clipping should be enabled as:
"gradient_clipping": 1.0
'''
GRADIENT_CLIPPING = 'gradient_clipping'
GRADIENT_CLIPPING_DEFAULT = 0.
GRADIENT_CLIPPING_DEFAULT = 1.0
Comment thread
sfc-gh-truwase marked this conversation as resolved.

#########################################
# Capture graph for short kernels sequences
Expand Down
63 changes: 63 additions & 0 deletions tests/unit/v1/half_precision/test_gradient_clipping_default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) DeepSpeed Team.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

import types

import deepspeed
from deepspeed.runtime.engine import DeepSpeedEngine
from deepspeed.runtime.config import get_gradient_clipping
from deepspeed.runtime.constants import GRADIENT_CLIPPING_DEFAULT
from unit.common import DistributedTest
from unit.simple_model import SimpleModel
import pytest


class TestGradientClippingConfig:

def test_default_is_one(self):
assert get_gradient_clipping({}) == GRADIENT_CLIPPING_DEFAULT == 1.0

@pytest.mark.parametrize("gradient_clipping", [0.5, 0.0])
def test_explicit_value_is_used(self, gradient_clipping):
assert get_gradient_clipping({"gradient_clipping": gradient_clipping}) == gradient_clipping

@pytest.mark.parametrize("gradient_clipping", [0.5, 0.0])
def test_engine_getter_returns_config_value(self, gradient_clipping):
engine = types.SimpleNamespace(_config=types.SimpleNamespace(gradient_clipping=gradient_clipping))
assert DeepSpeedEngine.gradient_clipping(engine) == gradient_clipping


class TestGradientClippingEndToEnd(DistributedTest):
world_size = 1

def _config(self, gradient_clipping=None):
config = {
"train_batch_size": 1,
"optimizer": {
"type": "Adam",
"params": {
"lr": 1e-3,
"torch_adam": True
}
},
}
if gradient_clipping is not None:
config["gradient_clipping"] = gradient_clipping
return config

def _init(self, gradient_clipping=None):
model = SimpleModel(hidden_dim=8)
engine, _, _, _ = deepspeed.initialize(config=self._config(gradient_clipping),
model=model,
model_parameters=model.parameters())
return engine

def test_init_without_gradient_clipping_defaults_to_one(self):
engine = self._init()
assert engine.gradient_clipping() == 1.0

def test_explicit_zero_disables_clipping(self):
engine = self._init(gradient_clipping=0.0)
assert engine.gradient_clipping() == 0.0
Loading