diff --git a/src/cerebras/modelzoo/data/nlp/t5/t5_utils.py b/src/cerebras/modelzoo/data/nlp/t5/t5_utils.py index 1e458301..ea78dfbc 100644 --- a/src/cerebras/modelzoo/data/nlp/t5/t5_utils.py +++ b/src/cerebras/modelzoo/data/nlp/t5/t5_utils.py @@ -318,7 +318,7 @@ def split_sequences(tokens, length): :param int length: The maximum allowed length of a sample. :returns: A list of sequences containing exactly the same samples as before - split into seperate samples such that no element of the dataset has + split into separate samples such that no element of the dataset has length longer than specified. """ n_tokens = len(tokens) diff --git a/src/cerebras/modelzoo/data_preparation/nlp/t5/utils.py b/src/cerebras/modelzoo/data_preparation/nlp/t5/utils.py index 1e458301..ea78dfbc 100644 --- a/src/cerebras/modelzoo/data_preparation/nlp/t5/utils.py +++ b/src/cerebras/modelzoo/data_preparation/nlp/t5/utils.py @@ -318,7 +318,7 @@ def split_sequences(tokens, length): :param int length: The maximum allowed length of a sample. :returns: A list of sequences containing exactly the same samples as before - split into seperate samples such that no element of the dataset has + split into separate samples such that no element of the dataset has length longer than specified. """ n_tokens = len(tokens)