Skip to content

Commit c856af5

Browse files
author
jetstream authors
committed
Merge pull request #250 from AI-Hypercomputer:vij_moe_runner
PiperOrigin-RevId: 748827578
2 parents 7ca7c58 + 4058510 commit c856af5

File tree

3 files changed

+70
-52
lines changed

3 files changed

+70
-52
lines changed

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ on:
2424
workflow_dispatch:
2525
schedule:
2626
# Run the job every 4 hours
27-
- cron: '0 */1 * * *'
27+
- cron: '0 */24 * * *'
2828

2929
jobs:
3030
prelim:
3131
runs-on: ["self-hosted", "tpu", "v6e-8"]
3232
steps:
3333
- uses: actions/checkout@v4
34-
- name: Test MOE Microbenchmarks
35-
run: bash .github/workflows/test_moe_microbenchmarks.sh
34+
- name: Test MOEBenchmarks
35+
run: bash .github/workflows/test_moe_benchmarks.sh
3636
# run: bash .github/workflows/test_moe_8x22b_microbenchmark.sh
3737
# - name: Test MOE long context chunked prefill - 8k
3838
# run: bash .github/workflows/benchmark_chunked_prefill.sh
@@ -50,19 +50,18 @@ jobs:
5050
- name: Log message if dependent job succeeded
5151
if: ${{ ! (failure() && github.event.pull_request == null) }}
5252
run: echo "Conditions for creating/updating issue not met. Skipping."
53-
# - name: Send email
54-
# uses: dawidd6/[email protected]
55-
# with:
56-
# server_address: smtp.gmail.com
57-
# server_port: 465
58-
# username: ${{secrets.MAIL_USERNAME}}
59-
# password: ${{secrets.MAIL_PASSWORD}}
60-
# subject: Message from Inference Stable Stack Runs.
61-
62-
# from: InferenceStableStackRuns
63-
# secure: true
64-
# attachments: ~/test_dir/moe_8x7b_jetstream.txt
65-
# # attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt
66-
# body: workflow for ${{github.repository}} completed successfully!
53+
- name: Send email
54+
uses: dawidd6/[email protected]
55+
with:
56+
server_address: smtp.gmail.com
57+
server_port: 465
58+
username: ${{secrets.MAIL_USERNAME}}
59+
password: ${{secrets.MAIL_PASSWORD}}
60+
subject: Message from Inference Stable Stack Runs.
61+
62+
from: JetStream Runs
63+
secure: true
64+
attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt,~/test_dir/moe_8x7b_jetstream.txt
65+
body: workflow for ${{github.repository}} completed successfully!
6766
- name: Cleanup
6867
run: rm -rf ~/test_dir
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
mkdir ~/test_dir
3+
cd ~/test_dir
4+
git clone https://github.com/google/maxtext.git
5+
6+
cd ~/test_dir
7+
git clone https://github.com/google/JetStream.git
8+
cd ~/test_dir
9+
sudo apt-get -y update
10+
sudo apt-get -y install python3.10-venv
11+
sudo apt-get -y install jq
12+
python -m venv .env
13+
source .env/bin/activate
14+
15+
cd ~/test_dir
16+
cd JetStream
17+
pip install -e .
18+
cd benchmarks
19+
pip install -r requirements.in
20+
21+
cd ~/test_dir
22+
cd maxtext/
23+
pip3 install wheel
24+
bash setup.sh MODE=stable DEVICE=tpu
25+
26+
pip install nltk==3.8.1
27+
28+
29+
# moe 8x7b microbenchmark
30+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ~/test_dir/moe_8x7b.txt
31+
tail -n5 ~/test_dir/moe_8x7b.txt > ~/test_dir/moe_8x7b.tmp && mv ~/test_dir/moe_8x7b.tmp ~/test_dir/moe_8x7b.txt
32+
33+
# moe 8x22B microbenchmark
34+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ~/test_dir/moe_8x22b.txt
35+
tail -n5 ~/test_dir/moe_8x22b.txt > ~/test_dir/moe_8x22b.tmp && mv ~/test_dir/moe_8x22b.tmp ~/test_dir/moe_8x22b.txt
36+
37+
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
38+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ~/test_dir/moe_8x22b_long_context_8k_prefill.txt
39+
tail -n5 ~/test_dir/moe_8x22b_long_context_8k_prefill.txt > ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp && mv ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp ~/test_dir/moe_8x22b_long_context_8k_prefill.txt
40+
41+
42+
# moe 8x7B Maxtext Jetstream
43+
44+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.maxengine_server MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=24 megablox=False quantization=int8 quantize_kvcache=True checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16
45+
46+
sleep 600
47+
48+
cd ..
49+
50+
python JetStream/benchmarks/benchmark_serving.py --tokenizer ~/test_dir/maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ~/test_dir/moe_8x7b_jetstream.txt
51+
tail -n10 ~/test_dir/moe_8x7b_jetstream.txt > ~/test_dir/moe_8x7b_jetstream.tmp && mv ~/test_dir/moe_8x7b_jetstream.tmp ~/test_dir/moe_8x7b_jetstream.txt
52+
53+
# kill python jobs
54+
sudo kill -9 $(ps aux | grep python | awk '{print $2}')

.github/workflows/test_moe_microbenchmarks.sh

Lines changed: 0 additions & 35 deletions
This file was deleted.

0 commit comments

Comments
 (0)