-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathcreate_sample_pcluster_config.sh
executable file
·197 lines (187 loc) · 6.54 KB
/
create_sample_pcluster_config.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/bin/bash
# this script creates a sample parallelcluster config file to work with your OOD environment.
# It needs to read outputs from your OOD stack you already deployed. So you need to have the AWS_PROFILE or access key environment variables set
# The cluster will have two partitions defined, one for general workload, one for interactive desktop.
# Please update your
export STACK_NAME=$1
if [ -z "$STACK_NAME" ]; then
# show error and exit
echo "Error: Stack name is required"
exit 1
fi
export REGION=${2:-"us-east-1"}
export DOMAIN_1=${3:-"hpclab"}
export DOMAIN_2=${4:-"local"}
PCLUSTER_FILENAME="pcluster-config.yml"
echo "[-] Checking if stack '$STACK_NAME' exists in region '$REGION'..."
if ! aws cloudformation describe-stacks --stack-name $STACK_NAME --region $REGION &>/dev/null ; then
echo "Error: Failed to describe stack '$STACK_NAME' in region '$REGION'. Please check your stack name and region."
exit 1
fi
echo "[-] Reading outputs from stack '$STACK_NAME' in region '$REGION'..."
export OOD_STACK=$(aws cloudformation describe-stacks --stack-name $STACK_NAME --region $REGION)
export AD_SECRET_ARN=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="ADAdministratorSecretARN") | .OutputValue')
export SUBNETS=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="PrivateSubnets") | .OutputValue')
export HEAD_SG=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="HeadNodeSecurityGroup") | .OutputValue')
export HEAD_POLICY=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="HeadNodeIAMPolicyArn") | .OutputValue')
export COMPUTE_SG=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="ComputeNodeSecurityGroup") | .OutputValue')
export COMPUTE_POLICY=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="ComputeNodeIAMPolicyArn") | .OutputValue')
export BUCKET_NAME=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="ClusterConfigBucket") | .OutputValue')
export LDAP_ENDPOINT=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="LDAPNLBEndPoint") | .OutputValue')
export MUNGEKEY_SECRET_ID=$(echo "$OOD_STACK" | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="MungeKeySecretId") | .OutputValue')
cat << EOF
[+] Using the following values to generate $PCLUSTER_FILENAME
DOMAIN_1 $DOMAIN_1
DOMAIN_2 $DOMAIN_2
STACK_NAME $STACK_NAME
REGION $REGION
AD_SECRET_ARN $AD_SECRET_ARN
SUBNETS $SUBNETS
HEAD_SG $HEAD_SG
HEAD_POLICY $HEAD_POLICY
COMPUTE_SG $COMPUTE_SG
COMPUTE_POLICY $COMPUTE_POLICY
BUCKET_NAME $BUCKET_NAME
LDAP_ENDPOINT $LDAP_ENDPOINT
MUNGKEY_SECRET_ID $MUNGEKEY_SECRET_ID
EOF
# Split the subnet string into an array
IFS=',' read -r -a subnets <<< "$SUBNETS"
SUBNET_LIST=$(
for subnet in "${subnets[@]}"; do
cat <<EOF
- $subnet
EOF
done
)
echo "[-] Buildng $PCLUSTER_FILENAME..."
cat << EOF > $PCLUSTER_FILENAME
HeadNode:
InstanceType: c5.large
Networking:
SubnetId: ${subnets[0]}
AdditionalSecurityGroups:
- $HEAD_SG
LocalStorage:
RootVolume:
VolumeType: gp3
Size: 50
Iam:
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
- Policy: arn:aws:iam::aws:policy/AmazonS3FullAccess
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
- Policy: $HEAD_POLICY
CustomActions:
OnNodeConfigured:
Script: >-
s3://$BUCKET_NAME/pcluster_head_node.sh
Args:
- $STACK_NAME
Scheduling:
Scheduler: slurm
SlurmSettings:
MungeKeySecretArn: $MUNGEKEY_SECRET_ID
SlurmQueues:
- Name: general
AllocationStrategy: lowest-price
ComputeResources:
- Name: general-cr
Instances:
- InstanceType: c5n.large
MinCount: 0
MaxCount: 4
Networking:
SubnetIds:
EOF
for subnet in "${subnets[@]}"; do
cat << EOF >> $PCLUSTER_FILENAME
- $subnet
EOF
done
cat << EOF >> $PCLUSTER_FILENAME
AdditionalSecurityGroups:
- $COMPUTE_SG
ComputeSettings:
LocalStorage:
RootVolume:
VolumeType: gp3
Size: 50
CustomActions:
OnNodeConfigured:
Script: >-
s3://$BUCKET_NAME/pcluster_worker_node.sh
Args:
- $STACK_NAME
Iam:
AdditionalIamPolicies:
- Policy: $COMPUTE_POLICY
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
- Name: desktop
AllocationStrategy: lowest-price
ComputeResources:
- Name: desktop-cr
Instances:
- InstanceType: c5n.2xlarge
MinCount: 0
MaxCount: 10
Networking:
SubnetIds:
EOF
for subnet in "${subnets[@]}"; do
cat << EOF >> $PCLUSTER_FILENAME
- $subnet
EOF
done
cat << EOF >> $PCLUSTER_FILENAME
AdditionalSecurityGroups:
- $COMPUTE_SG
ComputeSettings:
LocalStorage:
RootVolume:
VolumeType: gp3
Size: 50
CustomActions:
OnNodeConfigured:
Script: >-
s3://$BUCKET_NAME/pcluster_worker_node_desktop.sh
Args:
- $STACK_NAME
Iam:
AdditionalIamPolicies:
- Policy: $COMPUTE_POLICY
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
LoginNodes:
Pools:
- Name: login
Count: 1
InstanceType: c5.large
Networking:
SubnetIds:
- ${subnets[0]}
AdditionalSecurityGroups:
- $COMPUTE_SG
CustomActions:
OnNodeConfigured:
Script: >-
s3://$BUCKET_NAME/configure_login_nodes.sh
Args:
- $STACK_NAME
Iam:
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
- Policy: arn:aws:iam::aws:policy/AmazonS3FullAccess
- Policy: arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess
- Policy: arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess
Region: $REGION
Image:
Os: alinux2
DirectoryService:
DomainName: $DOMAIN_1.$DOMAIN_2
DomainAddr: ldap://$LDAP_ENDPOINT
PasswordSecretArn: $AD_SECRET_ARN
DomainReadOnlyUser: cn=Admin,ou=Users,ou=$DOMAIN_1,dc=$DOMAIN_1,dc=$DOMAIN_2
AdditionalSssdConfigs:
override_homedir: /shared/home/%u
ldap_auth_disable_tls_never_use_in_production: true
EOF