-
Notifications
You must be signed in to change notification settings - Fork 4
adding model and dataset flags to harness #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0df0c15
d01dbf4
75a71f4
ab3b1fe
cfb6da0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,20 +28,26 @@ def main(): | |
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the README to reflect the new |
||
| # 0. Prepare running | ||
| # Get the arguments | ||
| size, params, seed, num_runs, clrtxt, remote_be = utils.parse_submission_arguments('Run ML Inference FHE benchmark.') | ||
| size, params, seed, num_runs, clrtxt, remote_be, model_name, dataset_name = utils.parse_submission_arguments('Run ML Inference FHE benchmark.') | ||
| test = instance_name(size) | ||
| print(f"\n[harness] Running submission for {test} inference") | ||
|
|
||
| # Ensure the required directories exist | ||
| utils.ensure_directories(params.rootdir) | ||
|
|
||
| # Build the submission if not built already | ||
| utils.build_submission(params.rootdir/"scripts", remote_be) | ||
|
|
||
| # The harness scripts are in the 'harness' directory, | ||
| # the submission code is either in submission or submission_remote | ||
| harness_dir = params.rootdir/"harness" | ||
| exec_dir = params.rootdir/ ("submission_remote" if remote_be else "submission") | ||
| exec_dir = params.rootdir/ ("submission_remote" if remote_be else "submissions") | ||
|
|
||
| # check whether the exec_dir contains a subdirector equals to the model name. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo |
||
| model_exec_dir = exec_dir / model_name | ||
| if not model_exec_dir.is_dir(): | ||
| print(f"[harness]: Model directory {model_exec_dir} not found.") | ||
| sys.exit(1) | ||
|
|
||
| # Build the submission if not built already | ||
| utils.build_submission(params.rootdir/"scripts", model_name, remote_be) | ||
|
|
||
| # Remove and re-create IO directory | ||
| io_dir = params.iodir() | ||
|
|
@@ -52,12 +58,16 @@ def main(): | |
|
|
||
| # 1. Client-side: Generate the test datasets | ||
| dataset_path = params.datadir() / f"dataset.txt" | ||
| utils.run_exe_or_python(harness_dir, "generate_dataset", str(dataset_path)) | ||
| utils.log_step(1, "Test dataset generation") | ||
| dataset_args = ( | ||
| str(dataset_path), | ||
| str(dataset_name), | ||
| ) | ||
| utils.run_exe_or_python(harness_dir, "generate_dataset", *dataset_args) | ||
| utils.log_step(1, f"Harness: {dataset_name.upper()} Test dataset generation") | ||
|
|
||
| # 2.1 Communication: Get cryptographic context | ||
| if remote_be: | ||
| utils.run_exe_or_python(exec_dir, "server_get_params", str(size)) | ||
| utils.run_exe_or_python(model_exec_dir, "server_get_params", str(size)) | ||
| utils.log_step(2.1 , "Communication: Get cryptographic context") | ||
| # Report size of context | ||
| utils.log_size(io_dir / "client_data", "Cryptographic Context") | ||
|
|
@@ -66,22 +76,23 @@ def main(): | |
| # Note: this does not use the rng seed above, it lets the implementation | ||
| # handle its own prg needs. It means that even if called with the same | ||
| # seed multiple times, the keys and ciphertexts will still be different. | ||
| utils.run_exe_or_python(exec_dir, "client_key_generation", str(size)) | ||
| utils.log_step(2.2 , "Key Generation") | ||
| utils.run_exe_or_python(model_exec_dir, "client_key_generation", str(size)) | ||
| utils.log_step(2.2 , "Client: Key Generation") | ||
| # Report size of keys and encrypted data | ||
| utils.log_size(io_dir / "public_keys", "Public and evaluation keys") | ||
| utils.log_size(io_dir / "public_keys", "Client: Public and evaluation keys") | ||
|
|
||
| # 2.3 Communication: Upload evaluation key | ||
| if remote_be: | ||
| utils.run_exe_or_python(exec_dir, "server_upload_ek", str(size)) | ||
| utils.run_exe_or_python(model_exec_dir, "server_upload_ek", str(size)) | ||
| utils.log_step(2.3 , "Communication: Upload evaluation key") | ||
|
|
||
| # 3. Server-side: Preprocess the (encrypted) dataset using exec_dir/server_preprocess_model | ||
| utils.run_exe_or_python(exec_dir, "server_preprocess_model") | ||
| utils.log_step(3, "Encrypted model preprocessing") | ||
| utils.run_exe_or_python(model_exec_dir, "server_preprocess_model") | ||
| utils.log_step(3, "Server: (Encrypted) model preprocessing") | ||
|
|
||
| # Run steps 4-10 multiple times if requested | ||
| for run in range(num_runs): | ||
| run_path = params.measuredir() / f"results-{run+1}.json" | ||
| if num_runs > 1: | ||
| print(f"\n [harness] Run {run+1} of {num_runs}") | ||
|
|
||
|
|
@@ -93,30 +104,30 @@ def main(): | |
| genqry_seed = rng.integers(0,0x7fffffff) | ||
| cmd_args.extend(["--seed", str(genqry_seed)]) | ||
| utils.run_exe_or_python(harness_dir, "generate_input", *cmd_args) | ||
| utils.log_step(4, "Input generation") | ||
| utils.log_step(4, f"Harness: Input generation for {dataset_name.upper()}") | ||
|
|
||
| # 5. Client-side: Preprocess input using exec_dir/client_preprocess_input | ||
| utils.run_exe_or_python(exec_dir, "client_preprocess_input", str(size)) | ||
| utils.log_step(5, "Input preprocessing") | ||
| utils.run_exe_or_python(model_exec_dir, "client_preprocess_input", str(size)) | ||
| utils.log_step(5, "Client: Input preprocessing") | ||
|
|
||
| # 6. Client-side: Encrypt the input | ||
| utils.run_exe_or_python(exec_dir, "client_encode_encrypt_input", str(size)) | ||
| utils.log_step(6, "Input encryption") | ||
| utils.log_size(io_dir / "ciphertexts_upload", "Encrypted input") | ||
| utils.run_exe_or_python(model_exec_dir, "client_encode_encrypt_input", str(size)) | ||
| utils.log_step(6, "Client: Input encryption") | ||
| utils.log_size(io_dir / "ciphertexts_upload", "Client: Encrypted input") | ||
|
|
||
| # 7. Server side: Run the encrypted processing run exec_dir/server_encrypted_compute | ||
| utils.run_exe_or_python(exec_dir, "server_encrypted_compute", str(size)) | ||
| utils.log_step(7, "Encrypted computation") | ||
| utils.run_exe_or_python(model_exec_dir, "server_encrypted_compute", str(size)) | ||
| utils.log_step(7, "Server: Encrypted ML Inference computation") | ||
| # Report size of encrypted results | ||
| utils.log_size(io_dir / "ciphertexts_download", "Encrypted results") | ||
| utils.log_size(io_dir / "ciphertexts_download", "Client: Encrypted results") | ||
|
|
||
| # 8. Client-side: decrypt | ||
| utils.run_exe_or_python(exec_dir, "client_decrypt_decode", str(size)) | ||
| utils.log_step(8, "Result decryption") | ||
| utils.run_exe_or_python(model_exec_dir, "client_decrypt_decode", str(size)) | ||
| utils.log_step(8, "Client: Result decryption") | ||
|
|
||
| # 9. Client-side: post-process | ||
| utils.run_exe_or_python(exec_dir, "client_postprocess", str(size)) | ||
| utils.log_step(9, "Result postprocessing") | ||
| utils.run_exe_or_python(model_exec_dir, "client_postprocess", str(size)) | ||
| utils.log_step(9, "Client: Result postprocessing") | ||
|
|
||
| # 10 Verify the result for single inference or calculate quality for batch inference. | ||
| encrypted_model_preds = params.get_encrypted_model_predictions_file() | ||
|
|
@@ -141,10 +152,8 @@ def main(): | |
| utils.log_step(10.2, "Harness: Run quality check") | ||
|
|
||
| # 11. Store measurements | ||
| run_path = params.measuredir() / f"results-{run+1}.json" | ||
| run_path.parent.mkdir(parents=True, exist_ok=True) | ||
| submission_report_path = io_dir / "server_reported_steps.json" | ||
| utils.save_run(run_path, submission_report_path, size) | ||
| utils.save_run(run_path, size) | ||
|
|
||
| print(f"\nAll steps completed for the {instance_name(size)} inference!") | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,7 @@ | |
| # Global variable to store model quality metrics | ||
| _model_quality = {} | ||
|
|
||
| def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, int, int, bool]: | ||
| def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, int, int, bool, str, str]: | ||
| """ | ||
| Get the arguments of the submission. Populate arguments as needed for the workload. | ||
| """ | ||
|
|
@@ -50,6 +50,11 @@ def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, | |
| help='Specify with 1 if to rerun the cleartext computation') | ||
| parser.add_argument('--remote', action='store_true', | ||
| help='Run example submission in remote backend mode') | ||
| parser.add_argument('--model', default='mlp', type=str, | ||
| help='Pick a model run (default: mlp)') | ||
| parser.add_argument('--dataset', default='mnist', type=str, | ||
| help='Pick a dataset run (default: mnist)') | ||
|
|
||
|
|
||
| args = parser.parse_args() | ||
| size = args.size | ||
|
|
@@ -58,21 +63,25 @@ def parse_submission_arguments(workload: str) -> Tuple[int, InstanceParams, int, | |
| clrtxt = args.clrtxt | ||
| remote_be = args.remote | ||
|
|
||
| # adding model and dataset to the arguments | ||
| model_name = args.model.lower() | ||
| dataset_name = args.dataset.lower() | ||
|
|
||
| # Use params.py to get instance parameters | ||
| params = InstanceParams(size) | ||
| return size, params, seed, num_runs, clrtxt, remote_be | ||
| return size, params, seed, num_runs, clrtxt, remote_be, model_name, dataset_name | ||
|
|
||
| def ensure_directories(rootdir: Path): | ||
| """ Check that the current directory has sub-directories | ||
| 'harness', 'scripts', and 'submission' """ | ||
| required_dirs = ['harness', 'scripts', 'submission'] | ||
| 'harness', 'scripts', and 'submissions' """ | ||
| required_dirs = ['harness', 'scripts', 'submissions'] | ||
| for dir_name in required_dirs: | ||
| if not (rootdir / dir_name).exists(): | ||
| print(f"Error: Required directory '{dir_name}'", | ||
| f"not found in {rootdir}") | ||
| sys.exit(1) | ||
|
|
||
| def build_submission(script_dir: Path, remote_be: bool): | ||
| def build_submission(script_dir: Path, model_name: str, remote_be: bool): | ||
| """ | ||
| Build the submission, including pulling dependencies as neeed | ||
| """ | ||
|
|
@@ -82,15 +91,14 @@ def build_submission(script_dir: Path, remote_be: bool): | |
| # Clone and build OpenFHE if needed | ||
| subprocess.run([script_dir/"get_openfhe.sh"], check=True) | ||
| # CMake build of the submission itself | ||
| subprocess.run([script_dir/"build_task.sh", "./submission"], check=True) | ||
| subprocess.run([script_dir/"build_task.sh", f"./submissions/{model_name}"], check=True) | ||
|
|
||
| class TextFormat: | ||
| BOLD = "\033[1m" | ||
| GREEN = "\033[32m" | ||
| YELLOW = "\033[33m" | ||
| BLUE = "\033[34m" | ||
| RED = "\033[31m" | ||
| PURPLE = "\033[35m" | ||
| RESET = "\033[0m" | ||
|
|
||
| def log_step(step_num: int, step_name: str, start: bool = False): | ||
|
|
@@ -145,36 +153,24 @@ def human_readable_size(n: int): | |
| n /= 1024 | ||
| return f"{n:.1f}P" | ||
|
|
||
| def save_run(path: Path, submission_report_path: Path, size: int = 0): | ||
| def save_run(path: Path, size: int = 0): | ||
| global _timestamps | ||
| global _timestampsStr | ||
| global _bandwidth | ||
| global _model_quality | ||
|
|
||
| _timestampsStr["Total"] = f"{round(sum(_timestamps.values()), 4)}s" | ||
| _timestampsRemote = {} | ||
| if submission_report_path.exists(): | ||
| with open(submission_report_path, "r") as f: | ||
| server_reported_times = json.load(f) | ||
| print(f"{TextFormat.GREEN} [submission] Server reported steps: {server_reported_times}{TextFormat.RESET}") | ||
| for step_name, time_str in server_reported_times.items(): | ||
| _timestampsRemote[step_name] = f"{time_str}s" | ||
| print(f"{TextFormat.PURPLE} [submission] {step_name}: {time_str}s{TextFormat.RESET}") | ||
| else: | ||
| print(f"{TextFormat.PURPLE} [harness] Note: Submitters can specify Server reported steps file at {submission_report_path}{TextFormat.RESET}") | ||
|
|
||
| if size == 0: | ||
| json.dump({ | ||
| "Timing": _timestampsStr, | ||
| "Bandwidth": _bandwidth, | ||
| "Server Reported": _timestampsRemote, | ||
| "total_latency_ms": round(sum(_timestamps.values()), 4), | ||
| "per_stage": _timestampsStr, | ||
| "bandwidth": _bandwidth, | ||
| }, open(path,"w"), indent=2) | ||
| else: | ||
| json.dump({ | ||
| "Timing": _timestampsStr, | ||
| "Bandwidth": _bandwidth, | ||
| "Quality" : _model_quality, | ||
| "Server Reported": _timestampsRemote, | ||
| "total_latency_ms": round(sum(_timestamps.values()), 4), | ||
| "per_stage": _timestampsStr, | ||
| "bandwidth": _bandwidth, | ||
| "mnist_model_quality" : _model_quality, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just "model_quality"? |
||
| }, open(path,"w"), indent=2) | ||
|
|
||
| print("[total latency]", f"{round(sum(_timestamps.values()), 4)}s") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Turn this into a switch statement in preparation for multiple models?