Skip to content

Commit 0fb25a4

Browse files
committed
upgrade 1.1
1 parent 635bac5 commit 0fb25a4

11 files changed

+54
-29
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ If you are running this script, you will get this cloud resource just after 10 m
5858
- GCP CLI configured
5959
- Github SSH key
6060
- Dockerhub configured
61+
- Bash >= 4.2
6162
```
6263

6364
## 📂 Repository Structure
@@ -77,6 +78,7 @@ If you are running this script, you will get this cloud resource just after 10 m
7778
├── main.tf
7879
├── provider.tf
7980
├── storage.tf
81+
├── output.tf
8082
├── modules
8183
│ ├── vpc
8284
│ │ ├── main.tf

create_server_with_dynamic_zones.sh

+5-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ done
2222
for zone in "${zones[@]}"; do
2323
# 존(zone)에서 맨 뒤 두 글자를 제거하여 지역(region) 생성
2424
region=${zone::-2}
25+
if [[ -z "$zone" || "$zone" == "ZONE" ]]; then
26+
continue
27+
fi
2528

2629
echo "존(zone): $zone, 지역(region): $region 에서 make 명령어를 실행합니다..."
2730

@@ -34,8 +37,8 @@ for zone in "${zones[@]}"; do
3437

3538
# make 명령어의 종료 상태 확인
3639
if [ $? -ne 0 ]; then
37-
echo "$zone 에서 make 명령어가 실패했습니다. 10초 후에 make clean을 실행합니다..."
38-
sleep 30
40+
echo "$zone 에서 make 명령어가 실패했습니다. 100초 후에 make clean을 실행합니다..."
41+
sleep 100
3942
make clean
4043
else
4144
echo "$zone 에서 make 명령어가 성공적으로 완료되었습니다."

docs/SCRIPT_INFO.md

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ This script streamlines the process of deploying GCP Compute Engine instances eq
4646
├── main.tf # Terraform main config
4747
├── provider.tf # Terraform provider config
4848
├── storage.tf # Google cloud storage config
49+
├── output.tf # Google cloud storage config output
4950
├── modules # Terraform modules
5051
│ ├── vpc
5152
│ │ ├── main.tf

docs/USAGE.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
| ssh_file | SSH public key path | string | "../.ssh/id_ed25519.pub" | yes |
99
| ssh_file_private | SSH private key path | string | "../.ssh/id_ed25519" | yes |
1010
| env_file | Environment file path | string | "../.env" | yes |
11-
| git_ssh_url | Git clone URL | string | "https://github.com/OptiMaps/TrainRepo" | yes |
12-
| git_clone_dir | Directory path for cloned repository | string | "TrainRepo" | yes |
1311
| credentials_file | GCP credentials file path | string | "../credentials.json" | yes |
1412
| project | GCP project name | string | "optimap-438115" | yes |
1513
| region | GCP region name | string | "asia-east1" | yes |
@@ -96,6 +94,11 @@ ACTIVE ACCOUNT
9694
> [!IMPORTANT]
9795
> your credentials.json must be root directory and must follow that name.
9896
97+
set project to glcoud cli
98+
```bash
99+
glcoud config set project ${project-name}
100+
```
101+
99102
#### 3. set .ssh file
100103
In previous setting section, move root directory to create pub key and private key wrapping .ssh directory in chapter 4
101104
```bash
@@ -224,6 +227,11 @@ Open ./variables.tf, change default value (project, username) to your own enviro
224227
> [!CAUTION]
225228
> project name must specify numbers after project name dash (-)
226229
230+
set project to glcoud cli
231+
```bash
232+
glcoud config set project ${project-name}
233+
```
234+
227235
#### 4. create terraform.prod.tfvars
228236
create `terraform.prod.tfvars` in root directory and contents is following
229237
```hcl

src/.terraform.lock.hcl

+19
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/main.tf

+3-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ module "training_worker" {
2929

3030
ssh_file = var.ssh_file
3131
ssh_file_private = var.ssh_file_private
32-
git_ssh_url = var.git_ssh_url
33-
git_clone_dir = var.git_clone_dir
3432
machine_type = var.machine_type
3533
gpu_type = var.gpu_type
3634
gpu_count = var.gpu_count
@@ -40,5 +38,8 @@ module "training_worker" {
4038
dockerhub_id = var.dockerhub_id
4139
dockerhub_pwd = var.dockerhub_pwd
4240

41+
# output 처리
42+
artifact_bucket = google_storage_bucket.artifact_bucket.name
43+
4344
depends_on = [ module.vpc_network, google_storage_bucket.artifact_bucket ]
4445
}

src/modules/worker/main.tf

+1-5
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,12 @@ resource "google_compute_instance" "gpu_instance" {
8585
"sudo apt update && sudo apt upgrade -y",
8686
"mkdir /home/${var.username}/gcs-bucket", // create mount path for our bucket
8787
"sudo chown ${var.username}: /home/${var.username}/gcs-bucket",
88-
"sudo gcsfuse -o allow_other -file-mode=777 -dir-mode=777 rl-artifact-bucket /home/${var.username}/gcs-bucket", // mount our bucket
88+
"sudo gcsfuse -o allow_other -file-mode=777 -dir-mode=777 ${var.artifact_bucket} /home/${var.username}/gcs-bucket", // mount our bucket
8989
"sudo /opt/deeplearning/install-driver.sh", // install required GPU drivers
9090
"sudo apt install -y git",
9191
"chmod 400 /home/${var.username}/.ssh/id_ed25519", // allow git to use our ssh key
9292
"echo 'Host github.com' >> ~/.ssh/config",
9393
"echo ' StrictHostKeyChecking no' >> ~/.ssh/config",
94-
"git clone ${var.git_ssh_url}", // clone our application repository
95-
"cd ~/${var.git_clone_dir}",
9694
"sudo apt remove docker docker-engine docker.io containerd runc", ## - Old Version Remove
9795
"sudo apt update && sudo apt upgrade -y", # set up
9896
"sudo apt install -y apt-transport-https ca-certificates curl software-properties-common",
@@ -103,8 +101,6 @@ resource "google_compute_instance" "gpu_instance" {
103101
"sudo systemctl start docker",
104102
"sudo systemctl enable docker",
105103
"echo ${var.dockerhub_pwd} | docker login -u ${var.dockerhub_id} --password-stdin", # dockerhub login
106-
"docker pull falconlee236/rl-image:parco-cuda123", # pull train docker image,
107-
"git clone https://github.com/OptiMaps/TrainRepo", # 새로 추가한 부분, 문제 있으면 이거 지우기
108104
]
109105
connection {
110106
type = "ssh"

src/modules/worker/variables.tf

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@ variable "ssh_file" {}
22

33
variable "ssh_file_private" {}
44

5-
variable "git_ssh_url" {}
6-
7-
variable "git_clone_dir" {}
8-
95
variable "machine_type" {}
106

117
variable "gpu_type" {}
@@ -20,4 +16,6 @@ variable "env_file" {}
2016

2117
variable "dockerhub_id" {}
2218

23-
variable "dockerhub_pwd" {}
19+
variable "dockerhub_pwd" {}
20+
21+
variable "artifact_bucket" {}

src/output.tf

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# 무작위 ID 출력
2+
output "bucket_name" {
3+
value = google_storage_bucket.artifact_bucket.name
4+
}

src/storage.tf

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
# 무작위 ID 생성
2+
resource "random_id" "bucket_suffix" {
3+
byte_length = 4
4+
}
5+
16
resource "google_storage_bucket" "artifact_bucket" {
27
# bucket name must be globally unique with all users
3-
name = "rl-artifact-bucket"
8+
name = "rl-artifact-bucket-${random_id.bucket_suffix.hex}"
49
location = var.region
510

611
force_destroy = true

src/variables.tf

-12
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,6 @@ variable "env_file" {
1616
default = "../.env"
1717
}
1818

19-
variable "git_ssh_url" {
20-
description = "git clone url"
21-
type = string
22-
default = "https://github.com/OptiMaps/TrainRepo"
23-
}
24-
25-
variable "git_clone_dir" {
26-
description = "directory path"
27-
type = string
28-
default = "TrainRepo"
29-
}
30-
3119
variable "credentials_file" {
3220
description = "credentials file"
3321
type = string

0 commit comments

Comments
 (0)