From 3a796c223492bb91a62442b761c3a231a5736b8e Mon Sep 17 00:00:00 2001 From: Pranjal Chaturvedi Date: Sun, 12 Oct 2025 19:07:43 +0000 Subject: [PATCH] temp changes --- .../resources/default_bench_config.yml | 49 ++++++------ .../resources/fio_job_cases.csv | 19 ++++- .../resources/jobfile.fio | 14 ++-- .../resources/mount_config.yml | 8 +- .../resources/starter_script.sh | 74 +++++++++++++------ .../resources/version_details.yml | 4 +- .../starter-script.sh | 2 +- 7 files changed, 106 insertions(+), 64 deletions(-) diff --git a/gcsfuse-micro-benchmarking/resources/default_bench_config.yml b/gcsfuse-micro-benchmarking/resources/default_bench_config.yml index 294445a..eddae16 100644 --- a/gcsfuse-micro-benchmarking/resources/default_bench_config.yml +++ b/gcsfuse-micro-benchmarking/resources/default_bench_config.yml @@ -1,38 +1,31 @@ -zonal_benchmarking: True -reuse_same_mount: -iterations: 5 +zonal_benchmarking: False +reuse_same_mount: False +iterations: 3 bench_env: delete_after_use: True - zone: - project: + zone: us-central1-b + project: gcs-fuse-test gce_env: - vm_name: - machine_type: - image_family: - image_project: - disk_size: - startup_script: - gcs_bucket: - bucket_name: + vm_name: cpranjal-a4x-test + machine_type: + image_family: + image_project: + disk_size: + startup_script: resources/starter_script.sh + gcs_bucket: + bucket_name: # Leaving placement empty will create a regional bucket in the region of the zone. + placement: - storage_class: - enable_hns: + storage_class: + enable_hns: True -fio_jobfile_template: +fio_jobfile_template_path: resources/jobfile.fio job_details: - file_path: - bs: [] - file_size: [] - iotype: [] - iodepth: [] - threads: [] - nrfiles: [] - -mount_config_file: + file_path: resources/fio_job_cases.csv version_details: - go_version: - fio_version: - gcsfuse_version_or_commit: \ No newline at end of file + go_version: 1.22.0 + fio_version: 3.36 + gcsfuse_version_or_commit: v3.2.0 diff --git a/gcsfuse-micro-benchmarking/resources/fio_job_cases.csv b/gcsfuse-micro-benchmarking/resources/fio_job_cases.csv index e5d1aa8..25ce7c5 100644 --- a/gcsfuse-micro-benchmarking/resources/fio_job_cases.csv +++ b/gcsfuse-micro-benchmarking/resources/fio_job_cases.csv @@ -1,2 +1,19 @@ bs,file_size,iodepth,iotype,threads,nrfiles -4KB,1MB,1,read,1,1 +128K,128K,64,read,50,50 +128K,1M,64,read,50,50 +128K,100M,64,read,50,50 +128K,1G,64,read,50,10 +128K,10G,64,read,50,1 +1M,1M,64,read,50,50 +1M,100M,64,read,50,50 +1M,1G,64,read,50,10 +1M,10G,64,read,50,1 +128K,128K,64,read,96,50 +128K,1M,64,read,96,50 +128K,100M,64,read,96,50 +128K,1G,64,read,96,10 +128K,10G,64,read,96,1 +1M,1M,64,read,96,50 +1M,100M,64,read,96,50 +1M,1G,64,read,96,10 +1M,10G,64,read,96,1 \ No newline at end of file diff --git a/gcsfuse-micro-benchmarking/resources/jobfile.fio b/gcsfuse-micro-benchmarking/resources/jobfile.fio index cebf0cb..99be1c0 100644 --- a/gcsfuse-micro-benchmarking/resources/jobfile.fio +++ b/gcsfuse-micro-benchmarking/resources/jobfile.fio @@ -1,24 +1,24 @@ [global] allrandrepeat=0 create_serialize=0 -direct=1 +direct=1 # o-direct, no kernel call, directly to us fadvise_hint=0 file_service_type=random -group_reporting=1 +group_reporting=1 # combine all threads result into 1 iodepth=${IODEPTH} ioengine=libaio invalidate=1 openfiles=1 -rw=${IOTYPE} +rw=${IOTYPE} thread=1 -time_based=1 ramp_time=30s runtime=2m -filename_format=${FILENAME_FORMAT} +filename_format=file-cache-read/fs_${FILESIZE}/bs_${BLOCKSIZE}/nf_${NRFILES}/nj_${NUMJOBS}/experiment.$jobnum.$filenum -[experiment] +[read] +stonewall directory=${MNTDIR} bs=${BLOCKSIZE} filesize=${FILESIZE} nrfiles=${NRFILES} -numjobs=${NUMJOBS} \ No newline at end of file +numjobs=${NUMJOBS} diff --git a/gcsfuse-micro-benchmarking/resources/mount_config.yml b/gcsfuse-micro-benchmarking/resources/mount_config.yml index 87c9caf..68d52d9 100644 --- a/gcsfuse-micro-benchmarking/resources/mount_config.yml +++ b/gcsfuse-micro-benchmarking/resources/mount_config.yml @@ -1,8 +1,10 @@ +cache-dir: /mnt/lssd +file-cache: + cache-file-for-range-read: true + enable-parallel-downloads: true + max-size-mb: -1 metadata-cache: - negative-ttl-secs: 0 stat-cache-max-size-mb: -1 ttl-secs: -1 type-cache-max-size-mb: -1 implicit-dirs: true -write: - enable-streaming-writes: true \ No newline at end of file diff --git a/gcsfuse-micro-benchmarking/resources/starter_script.sh b/gcsfuse-micro-benchmarking/resources/starter_script.sh index 6552e8c..61cad8e 100644 --- a/gcsfuse-micro-benchmarking/resources/starter_script.sh +++ b/gcsfuse-micro-benchmarking/resources/starter_script.sh @@ -173,14 +173,14 @@ copy_resources_from_artifact_bucket(){ local artifacts_bucket=$1 local benchmark_id=$2 local dir=$3 - gcloud storage cp gs://$artifacts_bucket/$benchmark_id/* $dir/ + gcloud storage cp gs://$artifacts_bucket/$benchmark_id/* $dir/ < /dev/null } copy_raw_results_to_artifacts_bucket(){ local artifacts_bucket=$1 local benchmark_id=$2 local dir=$3 - gcloud storage cp --recursive $dir/raw-results/* gs://$artifacts_bucket/$benchmark_id/raw-results/ + gcloud storage cp --recursive $dir/raw-results/* gs://$artifacts_bucket/$benchmark_id/raw-results/ < /dev/null } # Install fio on the VM. @@ -386,6 +386,8 @@ mount_gcsfuse() { # Create the mount directory if it doesn't exist if [ ! -d "$mntdir" ]; then mkdir -p "$mntdir" + else + unmount_gcsfuse "$mntdir" # Ensure it's not mounted before we start fi # Check if the directory is already mounted @@ -393,16 +395,11 @@ mount_gcsfuse() { echo "Directory '$mntdir' is already a mount point. Skipping." return 0 fi - - # Check if the mount config file exists - if [ ! -f "$mount_config" ]; then - echo "Error: Mount config file not found at '$mount_config'." - return 1 - fi # Mount the GCS bucket using the config file - echo "Mounting bucket '$bucketname' to '$mntdir' with config '$mount_config'..." - "${gcsfuse_binary}" --config-file="$mount_config" "$bucketname" "$mntdir" + echo "Mounting bucket '$bucketname' to '$mntdir' using config file '$mount_config'..." + "$gcsfuse_binary" --config-file "$mount_config" \ + "$bucketname" "$mntdir" # Verify the mount was successful if mountpoint -q "$mntdir"; then @@ -475,14 +472,26 @@ start_benchmarking_runs() { echo "Error: mount config file not found at ${mount_config}" return 1 fi + + echo "--- DEBUG: Contents of fio_job_cases.csv ---" + cat "$fio_job_cases" + echo "--- END DEBUG ---" + + echo "--- DEBUG: Contents of jobfile.fio ---" + cat "$fio_job_file" + echo "--- END DEBUG ---" + + echo "--- DEBUG: Contents of mount_config.yml ---" + cat "$mount_config" + echo "--- END DEBUG ---" + # Mount the bucket once before the loop if reuse_same_mount is 'true' + if [[ "$reuse_same_mount" == "true" ]]; then + mount_gcsfuse "$mntdir" "$bucket" "$mount_config" + fi + # Read the CSV file line by line, skipping the header - tail -n +2 "$fio_job_cases" | while IFS=, read -r bs file_size iodepth iotype threads nrfiles; do - # Iterate for the specified number of runs for this job case - # Mount the bucket once before the loop if reuse_same_mount is 'true' - if [[ "$reuse_same_mount" == "true" ]]; then - mount_gcsfuse "$mntdir" "$bucket" "$mount_config" - fi + while IFS=, read -r bs file_size iodepth iotype threads nrfiles || [[ -n "$bs" ]]; do nrfiles="${nrfiles%$'\r'}" echo "Experiment config: ${bs}, ${file_size}, ${iodepth}, ${iotype}, ${threads}, ${nrfiles}" @@ -495,6 +504,20 @@ start_benchmarking_runs() { for ((i = 1; i <= iterations; i++)); do echo "Starting FIO run ${i} of ${iterations} for case: bs=${bs}, file_size=${file_size}, iodepth=${iodepth}, iotype=${iotype}, threads=${threads}, nrfiles=${nrfiles}" + + # Before each run, clear the GCSFuse cache directory if it exists + # to ensure a "cold cache" state for file-cache tests. + local cache_dir + cache_dir=$(/usr/local/bin/yq e '.cache-dir' "$mount_config") + # Also clear the log file from the previous run + sudo rm -f /tmp/gcsfuse.log + + if [ -d "$cache_dir" ]; then + echo "Clearing GCSFuse cache directory: ${cache_dir}" + sudo rm -rf "${cache_dir:?}"/{*,.gcsfuse_file_cache} 2>/dev/null || true + echo "File cache directory cleared at path: ${cache_dir}" + fi + # If reuse_same_mount is 'false', mount the bucket for this run if [[ "$reuse_same_mount" != "true" ]]; then mount_gcsfuse "$mntdir" "$bucket" "$mount_config" @@ -504,11 +527,18 @@ start_benchmarking_runs() { filename_format="${iotype}-\$jobnum/\$filenum" output_file="${testdir}/fio_output_iter${i}.json" - MNTDIR=${mntdir} IODEPTH=${iodepth} IOTYPE=${iotype} BLOCKSIZE=${bs} FILESIZE=${file_size} NRFILES=${nrfiles} NUMJOBS=${threads} FILENAME_FORMAT=${filename_format} ${fio_binary} $fio_job_file --output-format=json > "$output_file" 2>&1 + # Increase --alloc-size to pre-allocate more memory for fio to handle a large number of files. + # 2097152 KB = 2 GB. + MNTDIR=${mntdir} IODEPTH=${iodepth} IOTYPE=${iotype} BLOCKSIZE=${bs} FILESIZE=${file_size} NRFILES=${nrfiles} NUMJOBS=${threads} FILENAME_FORMAT=${filename_format} ${fio_binary} $fio_job_file --output-format=json --alloc-size=2097152 > "$output_file" < /dev/null end_time=$(date -u +"%Y-%m-%dT%H:%M:%S%z") echo "${i},${start_time},${end_time}" >> "$timestamps_file" + # # Print GCSFuse logs and save them for this iteration + # echo "--- GCSFuse logs for iteration ${i} ---" + # cat /tmp/gcsfuse.log || echo "Log file not found or empty." + # cp /tmp/gcsfuse.log "${testdir}/gcsfuse_iter${i}.log" + # If reuse_same_mount is 'false', unmount after this run if [[ "$reuse_same_mount" != "true" ]]; then unmount_gcsfuse "$mntdir" @@ -518,12 +548,12 @@ start_benchmarking_runs() { sleep 20 done - # Unmount the bucket once after the loop if reuse_same_mount is 'true' - if [[ "$reuse_same_mount" == "true" ]]; then - unmount_gcsfuse "$mntdir" - fi - done + done < <(tail -n +2 "$fio_job_cases") + # Unmount the bucket once after all loops if reuse_same_mount is 'true' + if [[ "$reuse_same_mount" == "true" ]]; then + unmount_gcsfuse "$mntdir" + fi } diff --git a/gcsfuse-micro-benchmarking/resources/version_details.yml b/gcsfuse-micro-benchmarking/resources/version_details.yml index 88d2c43..a30adfd 100644 --- a/gcsfuse-micro-benchmarking/resources/version_details.yml +++ b/gcsfuse-micro-benchmarking/resources/version_details.yml @@ -1,3 +1,3 @@ -go_version: 1.24.0 +go_version: 1.24.5 fio_version: 3.39 -gcsfuse_version_or_commit: master +gcsfuse_version_or_commit: v3.2.0 diff --git a/perf-benchmarking-for-releases/starter-script.sh b/perf-benchmarking-for-releases/starter-script.sh index 52c6d42..1ce0418 100755 --- a/perf-benchmarking-for-releases/starter-script.sh +++ b/perf-benchmarking-for-releases/starter-script.sh @@ -190,7 +190,7 @@ MOUNT_POINT="mount-point" CURR_DIR=$(pwd) GCSFUSE_BIN="$CURR_DIR/gcsfuse/gcsfuse" MNT="$CURR_DIR/$MOUNT_POINT" -SSD_MOUNT_DIR="/mnt/disks/local_ssd" +SSD_MOUNT_DIR="/mnt/lssd" FIO_JOB_DIR="/tmp/fio_jobs" # Download all FIO job spec files