From 6f6b208980cd8231af53112270e5bd9743d8d390 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 4 Aug 2025 07:03:49 +0000 Subject: [PATCH 1/2] Add HyperpodTrainingOperatorServiceRole in CF template --- .../hyperpod-eks-full-stack.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/hyperpod-eks-full-stack.yaml b/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/hyperpod-eks-full-stack.yaml index 432ad528d..8f870386b 100644 --- a/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/hyperpod-eks-full-stack.yaml +++ b/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/hyperpod-eks-full-stack.yaml @@ -550,6 +550,35 @@ Resources: ResolveConflicts: OVERWRITE ### ---------------- SageMaker Execution and Service Roles ----------------### + HyperpodTrainingOperatorServiceRole: + Type: 'AWS::IAM::Role' + Condition: CreateEKSCluster + Properties: + RoleName: !Sub 'hyperpod-training-operator-service-role' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Sid: AllowEksAuthToAssumeRoleForPodIdentity + Effect: Allow + Principal: + Service: pods.eks.amazonaws.com + Action: + - 'sts:AssumeRole' + - 'sts:TagSession' + Path: / + Policies: + - PolicyName: !Sub 'hyperpod-training-operator-service-policy' + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - 'sagemaker:DescribeClusterNode' + Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:cluster/*' + Tags: + - Key: Name + Value: !Sub 'hyperpod-training-operator-service-role' + ExecutionRole: Type: 'AWS::IAM::Role' Properties: From 33ecc700258dc055f401b29ef5396819ec16d910 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 7 Aug 2025 21:21:20 +0000 Subject: [PATCH 2/2] Add HPTO service role in nested CF template --- .../sagemaker-iam-role-stack.yaml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/nested-stacks/sagemaker-iam-role-stack.yaml b/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/nested-stacks/sagemaker-iam-role-stack.yaml index 3e58f3567..6fdc6f627 100644 --- a/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/nested-stacks/sagemaker-iam-role-stack.yaml +++ b/1.architectures/7.sagemaker-hyperpod-eks/cfn-templates/nested-stacks/sagemaker-iam-role-stack.yaml @@ -70,6 +70,35 @@ Resources: - !Sub 'arn:aws:s3:::${S3BucketName}/*' RoleName: !Sub '${ResourceNamePrefix}-SMHP-Exec-Role-${AWS::Region}' + HyperpodTrainingOperatorServiceRole: + Type: 'AWS::IAM::Role' + Condition: CreateEKSCluster + Properties: + RoleName: !Sub 'hyperpod-training-operator-service-role' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Sid: AllowEksAuthToAssumeRoleForPodIdentity + Effect: Allow + Principal: + Service: pods.eks.amazonaws.com + Action: + - 'sts:AssumeRole' + - 'sts:TagSession' + Path: / + Policies: + - PolicyName: !Sub 'hyperpod-training-operator-service-policy' + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - 'sagemaker:DescribeClusterNode' + Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:cluster/*' + Tags: + - Key: Name + Value: !Sub 'hyperpod-training-operator-service-role' + Outputs: SageMakerIAMRoleName: Description: 'SageMaker IAM role Name'