From 48bcd4893f65f63fc723eb457665d8794ca33958 Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 17:28:00 +0000 Subject: [PATCH 1/8] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': True, 'ecs_tests': True, 'eks_tests': True, 'sagemaker_local_tests': True, 'sagemaker_remote_tests': True, 'sanity_tests': True, 'security_tests': True}} --- dlc_developer_config.toml | 106 ++++---------------------------------- 1 file changed, 9 insertions(+), 97 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 440ddf1168c5..f8feaab037ee 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -1,162 +1,74 @@ [dev] -# Set to "huggingface", for example, if you are a huggingface developer. Default is "" partner_developer = "" -# Please only set it to true if you are preparing an EI related PR -# Do remember to revert it back to false before merging any PR (including EI dedicated PR) ei_mode = false -# Please only set it to true if you are preparing a NEURON related PR -# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR) neuron_mode = false -# Please only set it to true if you are preparing a NEURONX related PR -# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR) -neuronx_mode = false -# Please only set it to true if you are preparing a GRAVITON related PR -# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR) +neuronx_mode = true graviton_mode = false -# Please only set it to true if you are preparing a ARM64 related PR -# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) arm64_mode = false -# Please only set it to True if you are preparing a HABANA related PR -# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false -# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR) -# This mode is used to build TF 2.6 and PT1.11 DLC huggingface_trcomp_mode = false -# Please only set it to True if you are preparing a TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR) -# This mode is used to build PT1.12 and above DLC trcomp_mode = false -# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the -# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped, -# regardless of whether they are enabled or disabled below. -# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images. -# Do remember to revert it back to false before merging any PR. deep_canary_mode = false [build] -# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] - - -# By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_frameworks = [ "huggingface_pytorch",] +build_training = false build_inference = true - -# Set do_build to "false" to skip builds and test the latest image built by this PR -# Note: at least one build is required to set do_build to "false" do_build = true [notify] -### Notify on test failures -### Off by default notify_test_failures = false - # Valid values: medium or high - notification_severity = "medium" +notification_severity = "medium" [test] -### On by default sanity_tests = true security_tests = true - safety_check_test = false - ecr_scan_allowlist_feature = false +safety_check_test = false +ecr_scan_allowlist_feature = false ecs_tests = true eks_tests = true ec2_tests = true -# Set it to true if you are preparing a Benchmark related PR ec2_benchmark_tests = false - -### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by -### default. If false, these types of tests will be skipped while other tests will run as usual. -### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true. -### Off by default (set to false) ec2_tests_on_heavy_instances = false - -### SM specific tests -### On by default sagemaker_local_tests = true - -# run standard sagemaker remote tests from test/sagemaker_tests sagemaker_remote_tests = true -# run efa sagemaker tests sagemaker_efa_tests = false -# run release_candidate_integration tests sagemaker_rc_tests = false -# run sagemaker benchmark tests sagemaker_benchmark_tests = false - -# SM remote EFA test instance type sagemaker_remote_efa_instance_type = "" - -# Run CI tests for nightly images -# false by default nightly_pr_test_mode = false - use_scheduler = false [buildspec_override] -# Assign the path to the required buildspec file from the deep-learning-containers folder -# For example: -# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml" -# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml" -# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file. - -### TRAINING PR JOBS ### - -# Standard Framework Training dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" - -# HuggingFace Training dlc-pr-huggingface-tensorflow-training = "" dlc-pr-huggingface-pytorch-training = "" - -# Training Compiler dlc-pr-huggingface-pytorch-trcomp-training = "" dlc-pr-huggingface-tensorflow-2-trcomp-training = "" dlc-pr-pytorch-trcomp-training = "" - -# Neuron Training dlc-pr-pytorch-neuron-training = "" dlc-pr-tensorflow-2-neuron-training = "" - -# Stability AI Training dlc-pr-stabilityai-pytorch-training = "" - -# Habana Training dlc-pr-pytorch-habana-training = "" dlc-pr-tensorflow-2-habana-training = "" - -### INFERENCE PR JOBS ### - -# Standard Framework Inference dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" - -# Neuron Inference dlc-pr-pytorch-neuron-inference = "" dlc-pr-tensorflow-1-neuron-inference = "" dlc-pr-tensorflow-2-neuron-inference = "" - -# HuggingFace Inference dlc-pr-huggingface-tensorflow-inference = "" dlc-pr-huggingface-pytorch-inference = "" dlc-pr-huggingface-pytorch-neuron-inference = "" - -# Stability AI Inference dlc-pr-stabilityai-pytorch-inference = "" - -# Graviton Inference dlc-pr-pytorch-graviton-inference = "" dlc-pr-tensorflow-2-graviton-inference = "" - -# ARM64 Inference dlc-pr-pytorch-arm64-inference = "" dlc-pr-tensorflow-2-arm64-inference = "" - -# EIA Inference dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" +# WARNING: Unrecognized key generated below +dlc-pr-huggingface-pytorch-neuronx-inference = "huggingface/pytorch/inference/buildspec-neuronx.yml" + From f26c85a567db18b6b346ee58841b835f4212fd9e Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 17:32:30 +0000 Subject: [PATCH 2/8] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': True, 'ecs_tests': True, 'eks_tests': True, 'sagemaker_local_tests': True, 'sagemaker_remote_tests': True, 'sanity_tests': True, 'security_tests': True}} From 6737023a3563873d1436fdf77fcad9d1bf3a00e8 Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 17:33:46 +0000 Subject: [PATCH 3/8] bumb ubuntu for neuronx to 22 --- huggingface/pytorch/inference/buildspec-neuronx.yml | 2 +- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/huggingface/pytorch/inference/buildspec-neuronx.yml b/huggingface/pytorch/inference/buildspec-neuronx.yml index 21e479f145b3..92a44fecc255 100644 --- a/huggingface/pytorch/inference/buildspec-neuronx.yml +++ b/huggingface/pytorch/inference/buildspec-neuronx.yml @@ -35,7 +35,7 @@ images: python_version: &DOCKER_PYTHON_VERSION py3 tag_python_version: &TAG_PYTHON_VERSION py310 neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0 - os_version: &OS_VERSION ubuntu20.04 + os_version: &OS_VERSION ubuntu22.04 transformers_version: &TRANSFORMERS_VERSION 4.43.2 tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ] docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ] diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 6949d17c5522..3affa0c29bc5 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 LABEL dlc_major_version="1" LABEL maintainer="Amazon AI" From 46fd19833c82ba117461e547289f6433f49003e8 Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 18:13:29 +0000 Subject: [PATCH 4/8] gpg agent issue --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 3affa0c29bc5..3bd36a7d3045 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -38,8 +38,11 @@ RUN apt-get update \ && apt-get upgrade -y \ && apt-get install -y --no-install-recommends software-properties-common \ && add-apt-repository ppa:openjdk-r/ppa \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ + && apt-get update + +RUN apt-get install -y --no-install-recommends gpg-agent + +RUN apt-get install -y --no-install-recommends \ build-essential \ apt-transport-https \ ca-certificates \ @@ -59,7 +62,6 @@ RUN apt-get update \ unzip \ zlib1g-dev \ libcap-dev \ - gpg-agent \ libexpat1 \ libxml2 \ libgstreamer1.0-0 \ From 2cac4ca2368881686e638ab9805e1d7b4009795c Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 18:20:05 +0000 Subject: [PATCH 5/8] separate commands --- .../docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 3bd36a7d3045..9646224a5976 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -34,11 +34,11 @@ ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main ENV TEMP=/home/model-server/tmp -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y --no-install-recommends software-properties-common \ - && add-apt-repository ppa:openjdk-r/ppa \ - && apt-get update +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install -y --no-install-recommends software-properties-common +RUN add-apt-repository ppa:openjdk-r/ppa +RUN apt-get update RUN apt-get install -y --no-install-recommends gpg-agent From 5ba3b7604b20ca0fb947a43d40ffc56efd6b1cda Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 18:22:31 +0000 Subject: [PATCH 6/8] install gpg-agent sooner --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 9646224a5976..6de0bff41f3b 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -37,10 +37,10 @@ ENV TEMP=/home/model-server/tmp RUN apt-get update RUN apt-get upgrade -y RUN apt-get install -y --no-install-recommends software-properties-common +RUN apt-get install -y --no-install-recommends gpg-agent RUN add-apt-repository ppa:openjdk-r/ppa RUN apt-get update -RUN apt-get install -y --no-install-recommends gpg-agent RUN apt-get install -y --no-install-recommends \ build-essential \ From 955708426ff2ac8f41cdeb8a559b8621d8b7f110 Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 19:25:02 +0000 Subject: [PATCH 7/8] bump transformers to 4.48.0 --- huggingface/pytorch/inference/buildspec-neuronx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huggingface/pytorch/inference/buildspec-neuronx.yml b/huggingface/pytorch/inference/buildspec-neuronx.yml index 92a44fecc255..3f2b8cfcf63f 100644 --- a/huggingface/pytorch/inference/buildspec-neuronx.yml +++ b/huggingface/pytorch/inference/buildspec-neuronx.yml @@ -36,7 +36,7 @@ images: tag_python_version: &TAG_PYTHON_VERSION py310 neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0 os_version: &OS_VERSION ubuntu22.04 - transformers_version: &TRANSFORMERS_VERSION 4.43.2 + transformers_version: &TRANSFORMERS_VERSION 4.48.0 tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ] docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ] context: From 6640a08d00a39910283d60e3854abc05764f98ee Mon Sep 17 00:00:00 2001 From: Eli Davidson Date: Fri, 24 Jan 2025 19:53:38 +0000 Subject: [PATCH 8/8] bump optimum-neuron --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 6de0bff41f3b..d5c8919d9354 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -22,7 +22,7 @@ ARG NEURONX_TOOLS_VERSION=2.19.0.0 ARG TRANSFORMERS_VERSION ARG DIFFUSERS_VERSION=0.28.2 ARG HUGGINGFACE_HUB_VERSION=0.25.2 # For compatibility with diffusers 0.28.2 -ARG OPTIMUM_NEURON_VERSION=0.0.25 +ARG OPTIMUM_NEURON_VERSION=0.0.27 ARG SENTENCE_TRANSFORMERS=3.0.1 ARG PEFT_VERSION=0.12.0 ARG DATASETS_VERSION=2.19.0