Merge remote-tracking branch 'origin/main' into 2025q1-rva23-new-buil…

…d-config
llvm · Jan 25, 2025 · be344c2 · be344c2
2 parents dedaca3 + aa15ef9
commit be344c2
Show file tree

Hide file tree

Showing 11 changed files with 254 additions and 225 deletions.
diff --git a/buildbot/osuosl/master/config/builders.py b/buildbot/osuosl/master/config/builders.py
@@ -393,11 +393,11 @@
                 useTwoStage=True,
                 runTestSuite=True,
                 testsuite_flags=[
-                    '--cppflags', '-mcpu=cortex-a57 -fuse-ld=lld',
+                    '--cppflags', '-mcpu=neoverse-n1 -fuse-ld=lld',
                     '--threads=32', '--build-threads=32'],
                 extra_cmake_args=[
-                    "-DCMAKE_C_FLAGS='-mcpu=cortex-a57'",
-                    "-DCMAKE_CXX_FLAGS='-mcpu=cortex-a57'",
+                    "-DCMAKE_C_FLAGS='-mcpu=neoverse-n1'",
+                    "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-n1'",
                     "-DLLVM_ENABLE_LLD=True"])},
 
     ## AArch64 run test-suite at -O0 (GlobalISel is now default).
@@ -425,11 +425,11 @@
                     useTwoStage=True,
                     runTestSuite=True,
                     testsuite_flags=[
-                        '--cppflags', '-mcpu=cortex-a57 -fuse-ld=lld',
+                        '--cppflags', '-mcpu=neoverse-n1 -fuse-ld=lld',
                         '--threads=32', '--build-threads=32'],
                     extra_cmake_args=[
-                        "-DCMAKE_C_FLAGS='-mcpu=cortex-a57'",
-                        "-DCMAKE_CXX_FLAGS='-mcpu=cortex-a57'",
+                        "-DCMAKE_C_FLAGS='-mcpu=neoverse-n1'",
+                        "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-n1'",
                         "-DCOMPILER_RT_BUILD_SANITIZERS=OFF",
                         "-DLLVM_ENABLE_LLD=True",
                         # lld tests cause us to hit thread limits
@@ -452,11 +452,11 @@
                         'NO_STOP_MESSAGE':'1', # For Fortran test-suite
                     },
                     testsuite_flags=[
-                        '--cppflags', '-mcpu=cortex-a57',
+                        '--cppflags', '-mcpu=neoverse-n1',
                         '--threads=32', '--build-threads=32'],
                     extra_cmake_args=[
-                        "-DCMAKE_C_FLAGS='-mcpu=cortex-a57'",
-                        "-DCMAKE_CXX_FLAGS='-mcpu=cortex-a57'",
+                        "-DCMAKE_C_FLAGS='-mcpu=neoverse-n1'",
+                        "-DCMAKE_CXX_FLAGS='-mcpu=neoverse-n1'",
                         "-DLLVM_LIT_ARGS='-v'",
                         "-DMLIR_INCLUDE_INTEGRATION_TESTS=True",
                         "-DMLIR_RUN_ARM_SVE_TESTS=True",
@@ -1963,6 +1963,28 @@
                     script_interpreter=None
                 )},
 
+    {'name' : "amdgpu-offload-rhel-9-cmake-build-only",
+    'tags'  : ["openmp"],
+    'workernames' : ["rocm-docker-rhel-9"],
+    'builddir': "amdgpu-offload-rhel-9-cmake-build-only",
+    'factory' : AnnotatedBuilder.getAnnotatedBuildFactory(
+                    depends_on_projects=["llvm", "clang", "lld", "compiler-rt", "libcxx", "libcxxabi", "openmp", "offload", "libunwind"],
+                    script="amdgpu-offload-cmake.py",
+                    checkout_llvm_sources=True,
+                    script_interpreter=None
+                )},
+
+    {'name' : "amdgpu-offload-rhel-8-cmake-build-only",
+    'tags'  : ["amdgpu", "offload", "openmp"],
+    'workernames' : ["rocm-docker-rhel-8"],
+    'builddir': "amdgpu-offload-rhel-8-cmake-build-only",
+    'factory' : AnnotatedBuilder.getAnnotatedBuildFactory(
+                    depends_on_projects=["llvm", "clang", "lld", "compiler-rt", "libcxx", "libcxxabi", "offload", "openmp", "libunwind"],
+                    script="amdgpu-offload-cmake.py",
+                    checkout_llvm_sources=True,
+                    script_interpreter=None
+                )},
+
     {'name' : "openmp-offload-libc-amdgpu-runtime",
     'tags'  : ["openmp"],
     'workernames' : ["omp-vega20-1"],
@@ -3271,6 +3293,7 @@
                     util.Interpolate("-DLLVM_NATIVE_TOOL_DIR=%(prop:builddir)s/stage1.install/bin"),
                     "-DLLVM_BUILD_TESTS=True",
                     "-DPython3_EXECUTABLE=/usr/bin/python3",
+                    "-DLLVM_HOST_TRIPLE=riscv64-linux-gnu",
                     util.Interpolate("-DLLVM_EXTERNAL_LIT=%(prop:builddir)s/llvm-zorg/buildbot/riscv-rise/lit-on-qemu")],
                 stage2_toolchain_options=[
                     "set(CMAKE_SYSTEM_NAME Linux)",

diff --git a/buildbot/osuosl/master/config/status.py b/buildbot/osuosl/master/config/status.py
@@ -420,7 +420,8 @@ def getReporters():
                         "clang-hip-vega20",
                         "openmp-offload-amdgpu-runtime",
                         "openmp-offload-libc-amdgpu-runtime",
-                        "openmp-offload-sles-build-only"
+                        "openmp-offload-sles-build-only",
+                        "amdgpu-offload-ubuntu-22-cmake-build-only"
                     ])
             ]),
         reporters.MailNotifier(

diff --git a/buildbot/osuosl/master/config/workers.py b/buildbot/osuosl/master/config/workers.py
@@ -367,6 +367,8 @@ def get_all():
         create_worker("rocm-worker-hw-04-rhel-8_8", properties={'jobs': 32}, max_builds=1),
         # Containerized build-only, using llvm-project/offload/cmake/caches/AMDGPUbot.cmake
         create_worker("rocm-docker-ubu-22", properties={'jobs': 32}, max_builds=1),
+        create_worker("rocm-docker-rhel-9", properties={'jobs': 32}, max_builds=1),
+        create_worker("rocm-docker-rhel-8", properties={'jobs': 32}, max_builds=1),
 
         # AMD ROCm support, Ubuntu 18.04.6, AMD Ryzen @ 1.5 GHz, MI200 GPU
         create_worker("mi200-buildbot", max_builds=1),

diff --git a/buildbot/riscv-rise/lit-on-qemu b/buildbot/riscv-rise/lit-on-qemu
@@ -27,74 +27,91 @@ for var in ["BB_IMG_DIR", "BB_QEMU_CPU", "BB_QEMU_SMP", "BB_QEMU_MEM"]:
     if not os.getenv(var):
         error(f"{var} not set")
 
-# Create appropriate exec-on-boot script
-current_path = pathlib.Path.cwd()
-build_dir_name = current_path.name
-hgcomm_path = current_path / "hgcomm"
-
-if hgcomm_path.exists():
-    shutil.rmtree(hgcomm_path)
-hgcomm_path.mkdir()
-
-target_uid = 1000
-target_gid = 1000
-base_mount_path = current_path.parent
-
-args_string = " ".join(sys.argv[1:])
-exec_on_boot_content = f"""#!/bin/sh
-error() {{
-  printf "!!!!!!!!!! Error: %s !!!!!!!!!!\\n" "$*" >&2
-  exit 1
-}}
-mkdir -p "{base_mount_path}" || error "Can't make mount path"
-chown {target_uid}:{target_gid} "{base_mount_path}" || error "Chown failed"
-mount -t ext4 /dev/vdb "{base_mount_path}" || error "Mount failed"
-cd "{current_path}"
-su user -c "/usr/bin/python3 ./bin/llvm-lit {args_string}"
-"""
-exec_on_boot_path = hgcomm_path / "exec-on-boot"
-exec_on_boot_path.write_text(exec_on_boot_content)
-exec_on_boot_path.chmod(0o755)
-
 # Create ext4 filesystem containing the LLVM build directory and LLVM source
 # tree. Two layouts are supported:
 # 1) A typical layout used in local development, with build directories within
 # build/ in the monorepo checkout.
 # 2) The layout used by ClangBuilder.py in CI, with build directories as
 # siblings to the monorepo checkout (named 'llvm').
 print("@@@@@@@@@@ Creating ext4 filesystem with LLVM build directory @@@@@@@@")
-subprocess.run(["fallocate", "-l", "25GiB", "llvm-project.img"], check=True)
 
-if (current_path.parent.parent / ".git").is_dir():
+# Find the build directory, which for 'check-all' will be cwd, but might be
+# the parent for e.g. 'check-llvm-foo' which will change to build_dir/test
+# before invoking lit.
+build_dir = pathlib.Path.cwd()
+while build_dir != build_dir.parent:  # Stop at root
+    if (build_dir / '.ninja_deps').exists():
+        break
+    build_dir = build_dir.parent
+if not (build_dir / '.ninja_deps').exists():
+    error("Failed to find build directory")
+
+current_path = pathlib.Path.cwd()
+print(f"Initial working directory: {current_path}. Found build_dir: {build_dir}")
+
+llvm_img = build_dir / "llvm-project.img"
+subprocess.run(["fallocate", "-l", "50GiB", llvm_img], check=True)
+
+os.chdir(build_dir)
+build_dir_name = build_dir.name
+target_uid = 1000
+target_gid = 1000
+
+if (build_dir.parent.parent / ".git").is_dir():
     print("Note: 'Local dev' layout detected (build/build_dir nested in LLVM checkout)")
     extra_tar_args = [
         f"--exclude=build/{p.name} "
-        for p in current_path.parent.iterdir()
+        for p in build_dir.parent.iterdir()
         if p.is_dir() and p.name != build_dir_name
     ]
     extra_tar_args.append("--exclude=.git")
     extra_tar_args.append(f"--exclude=build/{build_dir_name}/llvm-project.img")
     paths_to_tar = "."
     change_to_dir = "../.."
-elif (current_path.parent / "llvm" / ".git").is_dir():
+    base_mount_path = build_dir.parent.parent
+elif (build_dir.parent / "llvm" / ".git").is_dir():
     print("Note: 'CI style' layout detected (llvm checkout and build_dir as siblings)")
     extra_tar_args = [
         "--exclude=llvm/.git",
-        f"--exclude={build_dir_name}/llvm-project.img"
+        f"--exclude={llvm_img}"
     ]
     paths_to_tar = f"llvm {build_dir_name}"
     change_to_dir = ".."
+    base_mount_path = build_dir.parent
 else:
     error("Unrecognized repo/build layout")
 
-parent_dir = current_path.parent
 tar_command = (
     f"tar --create --file=- --owner={target_uid} --group={target_gid} "
     f"{' '.join(extra_tar_args)} "
-    f"-C {change_to_dir} {paths_to_tar} | mkfs.ext4 -d - llvm-project.img"
+    f"-C {change_to_dir} {paths_to_tar} | mkfs.ext4 -d - {llvm_img}"
 )
+print(f"About to execute tar command: {tar_command}")
 subprocess.run(tar_command, shell=True, check=True)
 
+# Create appropriate exec-on-boot script
+hgcomm_path = build_dir / "hgcomm"
+
+if hgcomm_path.exists():
+    shutil.rmtree(hgcomm_path)
+hgcomm_path.mkdir()
+
+args_string = " ".join(sys.argv[1:])
+exec_on_boot_content = f"""#!/bin/sh
+error() {{
+  printf "!!!!!!!!!! Error: %s !!!!!!!!!!\\n" "$*" >&2
+  exit 1
+}}
+mkdir -p "{base_mount_path}" || error "Can't make mount path"
+chown {target_uid}:{target_gid} "{base_mount_path}" || error "Chown failed"
+mount -t ext4 /dev/vdb "{base_mount_path}" || error "Mount failed"
+cd "{current_path}"
+su user -c "/usr/bin/python3 {build_dir}/bin/llvm-lit {args_string}"
+"""
+exec_on_boot_path = hgcomm_path / "exec-on-boot"
+exec_on_boot_path.write_text(exec_on_boot_content)
+exec_on_boot_path.chmod(0o755)
+
 # Launch qemu-system appliance
 print("@@@@@@@@@@ Pivoting execution to qemu-system @@@@@@@@")
 # fmt: off
@@ -118,6 +135,7 @@ qemu_command = [
     "-append", "rw quiet root=/dev/vda console=ttyS0",
 ]
 # fmt: on
+print(f"About to execute qemu command: {' '.join(qemu_command)}")
 subprocess.run(qemu_command, check=True)
 print("@@@@@@@@@@ qemu-system execution finished @@@@@@@@")
 

diff --git a/premerge/linux_container_pod_template.yaml b/premerge/linux_container_pod_template.yaml
diff --git a/premerge/linux_runners_values.yaml b/premerge/linux_runners_values.yaml
@@ -4,17 +4,6 @@ githubConfigSecret: "github-token"
 minRunners: 0
 maxRunners: 4
 
-containerMode:
-  type: "kubernetes"
-  kubernetesModeWorkVolumeClaim:
-    accessModes: ["ReadWriteOnce"]
-    storageClassName: "standard-rwo"
-    resources:
-      requests:
-        storage: "200Gi"
-  kubernetesModeServiceAccount:
-    annotations:
-
 template:
   metadata:
     annotations:
@@ -29,49 +18,28 @@ template:
       premerge-platform: linux
     containers:
     - name: runner
-      image: ghcr.io/actions/actions-runner:latest
-      command: ["/home/runner/run.sh"]
+      image: ghcr.io/llvm/ci-ubuntu-22.04-agent:latest
+      command: ["/home/gha/actions-runner/run.sh"]
       resources:
-        # The container will be scheduled on the same node as this runner.
-        # This means if we don't set the CPU request high-enough here, 2
-        # containers will be scheduled on the same pod, meaning 2 jobs.
+        # If we don't set the CPU request high-enough here, 2 runners might
+        # be scheduled on the same pod, meaning 2 jobs, and they will starve
+        # each other.
         #
         # This number should be:
         #  - greater than number_of_cores / 2:
         #    A value lower than that could allow the scheduler to put 2
-        #    runners in the same pod. Meaning 2 containers in the same pod.
-        #    Meaning 2 jobs sharing the resources.
+        #    runners on the same node. Meaning 2 jobs sharing the resources of
+        #    a single node.
         #  - lower than number_of_cores:
         #    Each pod has some basic services running (metrics for ex). Those
         #    already require some amount of CPU (~0.5). This means we don't
         #    exactly have N cores to allocate, but N - epsilon.
         #
-        # Memory however shall be handled at the container level. The runner
-        # itself doesn't need much, just using something enough not to get
-        # OOM killed.
+        # We also need to request sufficient memory to not get OOM killed.
         requests:
           cpu: 55
-          memory: "8Gi"
+          memory: "200Gi"
         limits:
           cpu: 64
-          memory: "8Gi"
-      env:
-        - name: ACTIONS_RUNNER_CONTAINER_HOOKS
-          value: /home/runner/k8s/index.js
-        - name: ACTIONS_RUNNER_POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER
-          value: "true"
-        - name: ACTIONS_RUNNER_CONTAINER_HOOK_TEMPLATE
-          value: "/home/runner/pod-config/linux-container-pod-template.yaml"
-      volumeMounts:
-        - name: container-pod-config
-          mountPath: /home/runner/pod-config
-    securityContext:
-      fsGroup: 123
-    volumes:
-      - name: container-pod-config
-        configMap:
-          name: linux-container-pod-template
+          memory: "256Gi"
+