From e1467aeaa7f5005ab33cd4cbebdc853e5ec7d9d8 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sun, 29 Mar 2026 14:32:31 +0000
Subject: [PATCH] perf: optimize dataclass serialization for RequestMetrics and
 SpeculateMetrics

Replaces the slow `dataclasses.asdict()` with custom `to_dict()` methods
that explicitly iterate over fields and copy them. This avoids the recursive
deepcopy overhead and significantly improves serialization performance on the
hot path.

Co-authored-by: ZeyuChen <1371212+ZeyuChen@users.noreply.github.com>
---
 .jules/bolt.md               |  3 +++
 fastdeploy/engine/request.py | 19 ++++++++++++++++++-
 fastdeploy/worker/output.py  | 13 +++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 .jules/bolt.md

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 00000000000..a5d47a482d9
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2025-03-29 - Optimize dataclass serialization for metrics
+**Learning:** `dataclasses.asdict()` relies on recursive deepcopying which introduces significant overhead, especially for objects created and serialized frequently on the hot path (like `RequestMetrics` per request).
+**Action:** Replace `asdict()` with manual `to_dict()` methods that iterate over `__dataclass_fields__` using `getattr()`. Explicitly copy primitives, shallow copy lists/dicts, and call `.to_dict()` on nested dataclasses (like `SpeculateMetrics`) to avoid deepcopy overhead while maintaining the correct dictionary structure.
diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py
index 0e95cd5e1fb..ec08e84bd17 100644
--- a/fastdeploy/engine/request.py
+++ b/fastdeploy/engine/request.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import dataclasses
 import json
 import time
 import traceback
@@ -897,7 +898,23 @@ def to_dict(self):
         """
         Convert the RequestMetrics object to a dictionary.
         """
-        return {k: v for k, v in asdict(self).items()}
+        res = {}
+        for k in self.__dataclass_fields__:
+            v = getattr(self, k)
+            if type(v) in (int, float, str, bool, type(None)):
+                res[k] = v
+            elif dataclasses.is_dataclass(v):
+                if hasattr(v, "to_dict"):
+                    res[k] = v.to_dict()
+                else:
+                    res[k] = dataclasses.asdict(v)
+            elif isinstance(v, list):
+                res[k] = list(v)
+            elif isinstance(v, dict):
+                res[k] = dict(v)
+            else:
+                res[k] = v
+        return res
 
     def record_recv_first_token(self):
         cur_time = time.time()
diff --git a/fastdeploy/worker/output.py b/fastdeploy/worker/output.py
index 365fec12475..467ede05454 100644
--- a/fastdeploy/worker/output.py
+++ b/fastdeploy/worker/output.py
@@ -164,6 +164,19 @@ class SpeculateMetrics:
     """
     accept_ratio_per_head: list[float]
 
+    def to_dict(self):
+        """
+        convert SpeculateMetrics to a serialized dict
+        """
+        return {
+            "accepted_tokens": self.accepted_tokens,
+            "rejected_tokens": self.rejected_tokens,
+            "accept_ratio": self.accept_ratio,
+            "average_accept_length": self.average_accept_length,
+            "accepted_tokens_per_head": list(self.accepted_tokens_per_head),
+            "accept_ratio_per_head": list(self.accept_ratio_per_head),
+        }
+
 
 @dataclass
 class SamplerOutput: