From e1467aeaa7f5005ab33cd4cbebdc853e5ec7d9d8 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 29 Mar 2026 14:32:31 +0000 Subject: [PATCH] perf: optimize dataclass serialization for RequestMetrics and SpeculateMetrics Replaces the slow `dataclasses.asdict()` with custom `to_dict()` methods that explicitly iterate over fields and copy them. This avoids the recursive deepcopy overhead and significantly improves serialization performance on the hot path. Co-authored-by: ZeyuChen <1371212+ZeyuChen@users.noreply.github.com> --- .jules/bolt.md | 3 +++ fastdeploy/engine/request.py | 19 ++++++++++++++++++- fastdeploy/worker/output.py | 13 +++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000000..a5d47a482d9 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2025-03-29 - Optimize dataclass serialization for metrics +**Learning:** `dataclasses.asdict()` relies on recursive deepcopying which introduces significant overhead, especially for objects created and serialized frequently on the hot path (like `RequestMetrics` per request). +**Action:** Replace `asdict()` with manual `to_dict()` methods that iterate over `__dataclass_fields__` using `getattr()`. Explicitly copy primitives, shallow copy lists/dicts, and call `.to_dict()` on nested dataclasses (like `SpeculateMetrics`) to avoid deepcopy overhead while maintaining the correct dictionary structure. diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 0e95cd5e1fb..ec08e84bd17 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -16,6 +16,7 @@ from __future__ import annotations +import dataclasses import json import time import traceback @@ -897,7 +898,23 @@ def to_dict(self): """ Convert the RequestMetrics object to a dictionary. """ - return {k: v for k, v in asdict(self).items()} + res = {} + for k in self.__dataclass_fields__: + v = getattr(self, k) + if type(v) in (int, float, str, bool, type(None)): + res[k] = v + elif dataclasses.is_dataclass(v): + if hasattr(v, "to_dict"): + res[k] = v.to_dict() + else: + res[k] = dataclasses.asdict(v) + elif isinstance(v, list): + res[k] = list(v) + elif isinstance(v, dict): + res[k] = dict(v) + else: + res[k] = v + return res def record_recv_first_token(self): cur_time = time.time() diff --git a/fastdeploy/worker/output.py b/fastdeploy/worker/output.py index 365fec12475..467ede05454 100644 --- a/fastdeploy/worker/output.py +++ b/fastdeploy/worker/output.py @@ -164,6 +164,19 @@ class SpeculateMetrics: """ accept_ratio_per_head: list[float] + def to_dict(self): + """ + convert SpeculateMetrics to a serialized dict + """ + return { + "accepted_tokens": self.accepted_tokens, + "rejected_tokens": self.rejected_tokens, + "accept_ratio": self.accept_ratio, + "average_accept_length": self.average_accept_length, + "accepted_tokens_per_head": list(self.accepted_tokens_per_head), + "accept_ratio_per_head": list(self.accept_ratio_per_head), + } + @dataclass class SamplerOutput: