Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ harness/mnist/__pycache__/
# Virtual environments
virtualenv

# Virtual environments
virtualenv
bmenv


# Datasets, queries and results since we generate them on the fly
datasets/*
# But keep the folder alive with this one file, since some submissions might have fixed datasets
Expand All @@ -59,6 +64,17 @@ datasets/*
harness/mnist/data/*
harness/mnist/mnist_ffnn_model.pth

**/build/

# Temporary files for loading CIFAR-10 data and model training
harness/cifar10/data/*
harness/cifar10/data/
harness/cifar10/cifar10_resnet20.pth
harness/cifar10/cifar10_resnet20_model.pth


# Remote-submission artifacts
ek.lpk
submission_remote/__pycache__/


Binary file not shown.
Binary file not shown.
Binary file added harness/cifar10/__pycache__/test.cpython-312.pyc
Binary file not shown.
Binary file added harness/cifar10/__pycache__/train.cpython-312.pyc
Binary file not shown.
10 changes: 7 additions & 3 deletions harness/generate_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ def main():
Usage: python3 generate_dataset.py <output_file>
"""

if len(sys.argv) != 2:
sys.exit("Usage: generate_dataset.py <output_file>")
if len(sys.argv) != 3:
sys.exit("Usage: generate_dataset.py <output_file> [dataset_name]")

DATASET_PATH = Path(sys.argv[1])
DATASET_NAME = sys.argv[2]
DATASET_PATH.parent.mkdir(parents=True, exist_ok=True)

mnist.export_test_data(output_file=DATASET_PATH, num_samples=10000, seed=None)
if DATASET_NAME == "mnist":
mnist.export_test_data(output_file=DATASET_PATH, num_samples=10000, seed=None)
else:
raise ValueError(f"Unsupported dataset name: {DATASET_NAME}")


if __name__ == "__main__":
Expand Down
18 changes: 11 additions & 7 deletions harness/generate_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,21 @@ def main():
"""
Generate random value representing the query in the workload.
"""
__, params, seed, __, __, __ = parse_submission_arguments('Generate input for FHE benchmark.')
__, params, seed, __, __, __,__, dataset_name = parse_submission_arguments('Generate input for FHE benchmark.')
PIXELS_PATH = params.get_test_input_file()
LABELS_PATH = params.get_ground_truth_labels_file()

PIXELS_PATH.parent.mkdir(parents=True, exist_ok=True)
num_samples = params.get_batch_size()
mnist.export_test_pixels_labels(
data_dir = params.datadir(),
pixels_file=PIXELS_PATH,
labels_file=LABELS_PATH,
num_samples=num_samples,
seed=seed)
if dataset_name == "mnist":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turn this into a switch statement in preparation for multiple models?

mnist.export_test_pixels_labels(
data_dir = params.datadir(),
pixels_file=PIXELS_PATH,
labels_file=LABELS_PATH,
num_samples=num_samples,
seed=seed)
else:
raise ValueError(f"Unsupported dataset name: {dataset_name}")

if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion harness/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ def subdir(self):
"""Return the submission directory of this repository."""
return self.rootdir

def datadir(self):
def datadir(self, dataset=None):
"""Return the dataset directory path."""
# if dataset:
# return self.rootdir / "datasets" / dataset / instance_name(self.size)
return self.rootdir / "datasets" / instance_name(self.size)

def dataset_intermediate_dir(self):
Expand Down
69 changes: 39 additions & 30 deletions harness/run_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,26 @@ def main():

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the README to reflect the new python3 harness/run_submission.py -h.

# 0. Prepare running
# Get the arguments
size, params, seed, num_runs, clrtxt, remote_be = utils.parse_submission_arguments('Run ML Inference FHE benchmark.')
size, params, seed, num_runs, clrtxt, remote_be, model_name, dataset_name = utils.parse_submission_arguments('Run ML Inference FHE benchmark.')
test = instance_name(size)
print(f"\n[harness] Running submission for {test} inference")

# Ensure the required directories exist
utils.ensure_directories(params.rootdir)

# Build the submission if not built already
utils.build_submission(params.rootdir/"scripts", remote_be)

# The harness scripts are in the 'harness' directory,
# the submission code is either in submission or submission_remote
harness_dir = params.rootdir/"harness"
exec_dir = params.rootdir/ ("submission_remote" if remote_be else "submission")
exec_dir = params.rootdir/ ("submission_remote" if remote_be else "submissions")

# check whether the exec_dir contains a subdirector equals to the model name.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo

model_exec_dir = exec_dir / model_name
if not model_exec_dir.is_dir():
print(f"[harness]: Model directory {model_exec_dir} not found.")
sys.exit(1)

# Build the submission if not built already
utils.build_submission(params.rootdir/"scripts", model_name, remote_be)

# Remove and re-create IO directory
io_dir = params.iodir()
Expand All @@ -52,12 +58,16 @@ def main():

# 1. Client-side: Generate the test datasets
dataset_path = params.datadir() / f"dataset.txt"
utils.run_exe_or_python(harness_dir, "generate_dataset", str(dataset_path))
utils.log_step(1, "Test dataset generation")
dataset_args = (
str(dataset_path),
str(dataset_name),
)
utils.run_exe_or_python(harness_dir, "generate_dataset", *dataset_args)
utils.log_step(1, f"Harness: {dataset_name.upper()} Test dataset generation")

# 2.1 Communication: Get cryptographic context
if remote_be:
utils.run_exe_or_python(exec_dir, "server_get_params", str(size))
utils.run_exe_or_python(model_exec_dir, "server_get_params", str(size))
utils.log_step(2.1 , "Communication: Get cryptographic context")
# Report size of context
utils.log_size(io_dir / "client_data", "Cryptographic Context")
Expand All @@ -66,22 +76,23 @@ def main():
# Note: this does not use the rng seed above, it lets the implementation
# handle its own prg needs. It means that even if called with the same
# seed multiple times, the keys and ciphertexts will still be different.
utils.run_exe_or_python(exec_dir, "client_key_generation", str(size))
utils.log_step(2.2 , "Key Generation")
utils.run_exe_or_python(model_exec_dir, "client_key_generation", str(size))
utils.log_step(2.2 , "Client: Key Generation")
# Report size of keys and encrypted data
utils.log_size(io_dir / "public_keys", "Public and evaluation keys")
utils.log_size(io_dir / "public_keys", "Client: Public and evaluation keys")

# 2.3 Communication: Upload evaluation key
if remote_be:
utils.run_exe_or_python(exec_dir, "server_upload_ek", str(size))
utils.run_exe_or_python(model_exec_dir, "server_upload_ek", str(size))
utils.log_step(2.3 , "Communication: Upload evaluation key")

# 3. Server-side: Preprocess the (encrypted) dataset using exec_dir/server_preprocess_model
utils.run_exe_or_python(exec_dir, "server_preprocess_model")
utils.log_step(3, "Encrypted model preprocessing")
utils.run_exe_or_python(model_exec_dir, "server_preprocess_model")
utils.log_step(3, "Server: (Encrypted) model preprocessing")

# Run steps 4-10 multiple times if requested
for run in range(num_runs):
run_path = params.measuredir() / f"results-{run+1}.json"
if num_runs > 1:
print(f"\n [harness] Run {run+1} of {num_runs}")

Expand All @@ -93,30 +104,30 @@ def main():
genqry_seed = rng.integers(0,0x7fffffff)
cmd_args.extend(["--seed", str(genqry_seed)])
utils.run_exe_or_python(harness_dir, "generate_input", *cmd_args)
utils.log_step(4, "Input generation")
utils.log_step(4, f"Harness: Input generation for {dataset_name.upper()}")

# 5. Client-side: Preprocess input using exec_dir/client_preprocess_input
utils.run_exe_or_python(exec_dir, "client_preprocess_input", str(size))
utils.log_step(5, "Input preprocessing")
utils.run_exe_or_python(model_exec_dir, "client_preprocess_input", str(size))
utils.log_step(5, "Client: Input preprocessing")

# 6. Client-side: Encrypt the input
utils.run_exe_or_python(exec_dir, "client_encode_encrypt_input", str(size))
utils.log_step(6, "Input encryption")
utils.log_size(io_dir / "ciphertexts_upload", "Encrypted input")
utils.run_exe_or_python(model_exec_dir, "client_encode_encrypt_input", str(size))
utils.log_step(6, "Client: Input encryption")
utils.log_size(io_dir / "ciphertexts_upload", "Client: Encrypted input")

# 7. Server side: Run the encrypted processing run exec_dir/server_encrypted_compute
utils.run_exe_or_python(exec_dir, "server_encrypted_compute", str(size))
utils.log_step(7, "Encrypted computation")
utils.run_exe_or_python(model_exec_dir, "server_encrypted_compute", str(size))
utils.log_step(7, "Server: Encrypted ML Inference computation")
# Report size of encrypted results
utils.log_size(io_dir / "ciphertexts_download", "Encrypted results")
utils.log_size(io_dir / "ciphertexts_download", "Client: Encrypted results")

# 8. Client-side: decrypt
utils.run_exe_or_python(exec_dir, "client_decrypt_decode", str(size))
utils.log_step(8, "Result decryption")
utils.run_exe_or_python(model_exec_dir, "client_decrypt_decode", str(size))
utils.log_step(8, "Client: Result decryption")

# 9. Client-side: post-process
utils.run_exe_or_python(exec_dir, "client_postprocess", str(size))
utils.log_step(9, "Result postprocessing")
utils.run_exe_or_python(model_exec_dir, "client_postprocess", str(size))
utils.log_step(9, "Client: Result postprocessing")

# 10 Verify the result for single inference or calculate quality for batch inference.
encrypted_model_preds = params.get_encrypted_model_predictions_file()
Expand All @@ -141,10 +152,8 @@ def main():
utils.log_step(10.2, "Harness: Run quality check")

# 11. Store measurements
run_path = params.measuredir() / f"results-{run+1}.json"
run_path.parent.mkdir(parents=True, exist_ok=True)
submission_report_path = io_dir / "server_reported_steps.json"
utils.save_run(run_path, submission_report_path, size)
utils.save_run(run_path, size)

print(f"\nAll steps completed for the {instance_name(size)} inference!")

Expand Down
50 changes: 23 additions & 27 deletions harness/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# Global variable to store model quality metrics
_model_quality = {}

def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, int, int, bool]:
def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, int, int, bool, str, str]:
"""
Get the arguments of the submission. Populate arguments as needed for the workload.
"""
Expand All @@ -50,6 +50,11 @@ def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int,
help='Specify with 1 if to rerun the cleartext computation')
parser.add_argument('--remote', action='store_true',
help='Run example submission in remote backend mode')
parser.add_argument('--model', default='mlp', type=str,
help='Pick a model run (default: mlp)')
parser.add_argument('--dataset', default='mnist', type=str,
help='Pick a dataset run (default: mnist)')


args = parser.parse_args()
size = args.size
Expand All @@ -58,21 +63,25 @@ def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int,
clrtxt = args.clrtxt
remote_be = args.remote

# adding model and dataset to the arguments
model_name = args.model.lower()
dataset_name = args.dataset.lower()

# Use params.py to get instance parameters
params = InstanceParams(size)
return size, params, seed, num_runs, clrtxt, remote_be
return size, params, seed, num_runs, clrtxt, remote_be, model_name, dataset_name

def ensure_directories(rootdir: Path):
""" Check that the current directory has sub-directories
'harness', 'scripts', and 'submission' """
required_dirs = ['harness', 'scripts', 'submission']
'harness', 'scripts', and 'submissions' """
required_dirs = ['harness', 'scripts', 'submissions']
for dir_name in required_dirs:
if not (rootdir / dir_name).exists():
print(f"Error: Required directory '{dir_name}'",
f"not found in {rootdir}")
sys.exit(1)

def build_submission(script_dir: Path, remote_be: bool):
def build_submission(script_dir: Path, model_name: str, remote_be: bool):
"""
Build the submission, including pulling dependencies as neeed
"""
Expand All @@ -82,15 +91,14 @@ def build_submission(script_dir: Path, remote_be: bool):
# Clone and build OpenFHE if needed
subprocess.run([script_dir/"get_openfhe.sh"], check=True)
# CMake build of the submission itself
subprocess.run([script_dir/"build_task.sh", "./submission"], check=True)
subprocess.run([script_dir/"build_task.sh", f"./submissions/{model_name}"], check=True)

class TextFormat:
BOLD = "\033[1m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
RED = "\033[31m"
PURPLE = "\033[35m"
RESET = "\033[0m"

def log_step(step_num: int, step_name: str, start: bool = False):
Expand Down Expand Up @@ -145,36 +153,24 @@ def human_readable_size(n: int):
n /= 1024
return f"{n:.1f}P"

def save_run(path: Path, submission_report_path: Path, size: int = 0):
def save_run(path: Path, size: int = 0):
global _timestamps
global _timestampsStr
global _bandwidth
global _model_quality

_timestampsStr["Total"] = f"{round(sum(_timestamps.values()), 4)}s"
_timestampsRemote = {}
if submission_report_path.exists():
with open(submission_report_path, "r") as f:
server_reported_times = json.load(f)
print(f"{TextFormat.GREEN} [submission] Server reported steps: {server_reported_times}{TextFormat.RESET}")
for step_name, time_str in server_reported_times.items():
_timestampsRemote[step_name] = f"{time_str}s"
print(f"{TextFormat.PURPLE} [submission] {step_name}: {time_str}s{TextFormat.RESET}")
else:
print(f"{TextFormat.PURPLE} [harness] Note: Submitters can specify Server reported steps file at {submission_report_path}{TextFormat.RESET}")

if size == 0:
json.dump({
"Timing": _timestampsStr,
"Bandwidth": _bandwidth,
"Server Reported": _timestampsRemote,
"total_latency_ms": round(sum(_timestamps.values()), 4),
"per_stage": _timestampsStr,
"bandwidth": _bandwidth,
}, open(path,"w"), indent=2)
else:
json.dump({
"Timing": _timestampsStr,
"Bandwidth": _bandwidth,
"Quality" : _model_quality,
"Server Reported": _timestampsRemote,
"total_latency_ms": round(sum(_timestamps.values()), 4),
"per_stage": _timestampsStr,
"bandwidth": _bandwidth,
"mnist_model_quality" : _model_quality,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just "model_quality"?

}, open(path,"w"), indent=2)

print("[total latency]", f"{round(sum(_timestamps.values()), 4)}s")
Expand Down
Loading