Skip to content

Commit 3aa65c6

Browse files
Arjunbalaawsjoshir
authored andcommitted
Merge pull request #24 from aws-neuron/release_cut_2.21
Release 2.21
1 parent 083d3ea commit 3aa65c6

File tree

192 files changed

+13756
-4644
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+13756
-4644
lines changed

Diff for: .gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Python .gitignore template
22

3+
*.deb
4+
*.pt
5+
36
# Byte-compiled / optimized / DLL files
47
__pycache__/
58
*.py[cod]
@@ -78,6 +81,7 @@ target/
7881

7982
# Jupyter Notebook
8083
.ipynb_checkpoints
84+
*.ipynb
8185

8286
# IPython
8387
profile_default/
@@ -140,3 +144,8 @@ src/neuronx_distributed.egg-info/
140144
*.whl
141145
**/.DS_Store
142146
__pycache__
147+
.vscode
148+
/exp*
149+
/tmp*
150+
tmp.*
151+
pyproject.toml

Diff for: .pre-commit-config.yaml

+8-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ repos:
1414
- id: clang-format
1515
args: [--style=file, -i]
1616
- repo: https://github.com/astral-sh/ruff-pre-commit
17-
rev: v0.5.0
17+
rev: v0.6.2
1818
hooks:
1919
- id: ruff
2020
name: ruff
@@ -23,3 +23,10 @@ repos:
2323
types: [python]
2424
language: system
2525
exclude: cases_update
26+
- repo: https://github.com/pre-commit/mirrors-mypy
27+
rev: v1.11.2
28+
hooks:
29+
- id: mypy
30+
name: mypy
31+
language: python
32+
files: src/.*\.py

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ To build from source, run the following command:
1111
```
1212
bash ./build.sh
1313
```
14-
14+
1515
It should place the wheel at `build/`
1616

1717
## API Reference Guide

Diff for: build.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ fi
1515
# Run static code analysis
1616
python3.8 -m pip install mypy
1717
# Install type bindings
18-
python3.8 -m pip install types-requests boto3-stubs[s3]
18+
python3.8 -m pip install types-requests boto3-stubs[s3] types-PyYAML
1919
# removing cache fails in ToD
20-
python3.8 -m mypy --no-incremental || true
20+
python3.8 -m mypy --no-incremental --cache-dir=/dev/null
2121
# exit when asked to run `mypy` only
2222
if [[ "$1" == "mypy" ]]
2323
then

Diff for: examples/inference/dbrx/dbrx_runner.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -5,54 +5,46 @@
55
NeuronDbrxModel,
66
)
77
from runner import InferenceRunner
8-
from transformers import AutoTokenizer
8+
from transformers import AutoTokenizer, DbrxConfig
99

1010
from neuronx_distributed.parallel_layers.checkpointing import _invoke_preshard_hook
1111

1212

1313
class DbrxRunner(InferenceRunner):
1414
def load_hf_model(self):
15-
config = NeuronDbrxConfig.from_pretrained(self.model_path)
16-
return NeuronDbrxForCausalLM.load_hf_model(self.model_path, config)
15+
hf_config = DbrxConfig.from_pretrained(self.model_path)
16+
return NeuronDbrxForCausalLM.load_hf_model(self.model_path, hf_config)
1717

1818
def load_neuron_model_on_cpu(self, max_prompt_length, sequence_length, batch_size, **kwargs):
1919
# On CPU we can only run tensor parallelism with degree 1
20-
config = self.get_config_for_nxd(
20+
hf_config = self.get_hf_config(sequence_length=sequence_length, **kwargs)
21+
neuron_config = self.get_config_for_nxd(
22+
hf_config,
2123
batch_size,
2224
1,
2325
max_prompt_length=max_prompt_length,
2426
sequence_length=sequence_length,
2527
enable_bucketing=False,
2628
**kwargs)
27-
config.torch_dtype = torch.float32
29+
hf_config.torch_dtype = torch.float32
2830

2931
self.init_ditributed_env()
30-
neuron_model = NeuronDbrxModel(config)
32+
neuron_model = NeuronDbrxModel(neuron_config)
3133

32-
state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, config)
34+
state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, neuron_config)
3335

3436
_invoke_preshard_hook(neuron_model, state_dict)
3537

3638
neuron_model.load_state_dict(state_dict, strict=False)
3739

38-
if config.torch_dtype == torch.bfloat16:
40+
if hf_config.torch_dtype == torch.bfloat16:
3941
neuron_model.bfloat16()
4042

41-
model = NeuronDbrxForCausalLM(None, config)
43+
model = NeuronDbrxForCausalLM(None, neuron_config)
4244
model.context_encoding_model.model = neuron_model
4345
model.token_generation_model.model = neuron_model
4446
return model
4547

46-
def load_neuron_model(self, traced_model_path):
47-
config = NeuronDbrxConfig.from_pretrained(traced_model_path)
48-
model = NeuronDbrxForCausalLM.from_pretrained("", config)
49-
50-
model.load(traced_model_path)
51-
if config.torch_dtype == torch.bfloat16:
52-
model.bfloat16()
53-
54-
return model
55-
5648
def load_tokenizer(self, padding_side=None):
5749
tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
5850
tokenizer.pad_token = tokenizer.unk_token

0 commit comments

Comments
 (0)