Skip to content

Commit 6406b9f

Browse files
zhangtemplarfacebook-github-bot
authored andcommitted
add smollm to torchtune (#2887)
Summary: add smolLM2 family to torchtune. SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They architecture is the same as LLaMA 3. Reviewed By: byzhang Differential Revision: D78495904 Privacy Context Container: L1305358
1 parent 10c31c0 commit 6406b9f

File tree

3 files changed

+98
-0
lines changed

3 files changed

+98
-0
lines changed

torchtune/models/smol/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from ._component_builders import smollm2
8+
9+
from ._model_builders import (
10+
smollm2_135m,
11+
smollm2_360m,
12+
smollm2_1_7b,
13+
)
14+
15+
__all__ = [
16+
"smollm2",
17+
"smollm2_135m",
18+
"smollm2_360m",
19+
"smollm2_1_7b",
20+
]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from torchtune.modules import (
7+
TransformerDecoder,
8+
)
9+
from torchtune.models.llama3_2._component_builders import llama3_2
10+
11+
"""
12+
Component builders for SmolLM 2. It is based on LLaMA architecture.
13+
14+
https://huggingface.co/HuggingFaceTB/SmolLM2-135M/
15+
16+
SmolLM2 is a family of compact language models available in three size: 135M, 360M,
17+
and 1.7B parameters. They are capable of solving a wide range of tasks while being
18+
lightweight enough to run on-device. More details in our paper: https://arxiv.org/abs/2502.02737v1
19+
"""
20+
21+
22+
def smollm2(
23+
num_layers: int,
24+
num_heads: int,
25+
num_kv_heads: int,
26+
embed_dim: int,
27+
intermediate_dim: int,
28+
max_seq_len: int = 8192,
29+
vocab_size: int = 49152,
30+
attn_dropout: float = 0.0,
31+
rope_base: int = 100000,
32+
norm_eps: float = 1e-5,
33+
scale_factor: int = 32,
34+
tie_word_embeddings: bool = True,
35+
) -> TransformerDecoder:
36+
return llama3_2(
37+
vocab_size=vocab_size,
38+
num_layers=num_layers,
39+
num_heads=num_heads,
40+
num_kv_heads=num_kv_heads,
41+
embed_dim=embed_dim,
42+
max_seq_len=max_seq_len,
43+
attn_dropout=attn_dropout,
44+
rope_base=rope_base,
45+
intermediate_dim=intermediate_dim,
46+
norm_eps=norm_eps,
47+
scale_factor=scale_factor,
48+
tie_word_embeddings=tie_word_embeddings,
49+
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from torchtune.models.smol._component_builders import smollm2
8+
9+
from torchtune.modules import TransformerDecoder
10+
11+
"""
12+
https://huggingface.co/HuggingFaceTB/SmolLM2-135M/
13+
14+
SmolLM2 is a family of compact language models available in three size: 135M, 360M,
15+
and 1.7B parameters. They are capable of solving a wide range of tasks while being
16+
lightweight enough to run on-device. More details in our paper: https://arxiv.org/abs/2502.02737v1
17+
"""
18+
19+
20+
def smollm2_135m() -> TransformerDecoder:
21+
return smollm2(30, 9, 3, 576, 1536)
22+
23+
24+
def smollm2_360m() -> TransformerDecoder:
25+
return smollm2(32, 15, 5, 960, 2560)
26+
27+
28+
def smollm2_1_7b() -> TransformerDecoder:
29+
return smollm2(24, 32, 32, 2048, 8192, rope_base=130000)

0 commit comments

Comments
 (0)