diff --git a/veadk/agent.py b/veadk/agent.py index 3197a4e8..00eea665 100644 --- a/veadk/agent.py +++ b/veadk/agent.py @@ -141,6 +141,9 @@ def model_post_init(self, __context: Any) -> None: if self.long_term_memory is not None: from google.adk.tools import load_memory + if not load_memory.custom_metadata: + load_memory.custom_metadata = {} + load_memory.custom_metadata["backend"] = self.long_term_memory.backend self.tools.append(load_memory) logger.info(f"VeADK version: {VERSION}") diff --git a/veadk/tools/load_knowledgebase_tool.py b/veadk/tools/load_knowledgebase_tool.py index b0718d0d..b087957b 100644 --- a/veadk/tools/load_knowledgebase_tool.py +++ b/veadk/tools/load_knowledgebase_tool.py @@ -25,6 +25,9 @@ from veadk.knowledgebase import KnowledgeBase from veadk.knowledgebase.entry import KnowledgebaseEntry +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) if TYPE_CHECKING: from google.adk.models.llm_request import LlmRequest @@ -96,6 +99,15 @@ class LoadKnowledgebaseTool(FunctionTool): def __init__(self): super().__init__(load_knowledgebase) + global knowledgebase + if knowledgebase is None: + logger.info( + "Get global knowledgebase instance failed, failed to set knowledgebase tool backend." + ) + else: + if not self.custom_metadata: + self.custom_metadata = {} + self.custom_metadata["backend"] = knowledgebase.backend @override def _get_declaration(self) -> types.FunctionDeclaration | None: diff --git a/veadk/tracing/telemetry/exporters/apmplus_exporter.py b/veadk/tracing/telemetry/exporters/apmplus_exporter.py index a7b142af..6486921a 100644 --- a/veadk/tracing/telemetry/exporters/apmplus_exporter.py +++ b/veadk/tracing/telemetry/exporters/apmplus_exporter.py @@ -126,6 +126,12 @@ class Meters: "gen_ai.chat_completions.streaming_time_per_output_token" ) + # apmplus metrics + # span duration + APMPLUS_SPAN_LATENCY = "apmplus_span_latency" + # tool token usage + APMPLUS_TOOL_TOKEN_USAGE = "apmplus_tool_token_usage" + class MeterUploader: def __init__( @@ -197,6 +203,20 @@ def __init__( explicit_bucket_boundaries_advisory=_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS, ) + # apmplus metrics for veadk dashboard + self.apmplus_span_latency = self.meter.create_histogram( + name=Meters.APMPLUS_SPAN_LATENCY, + description="Latency of span", + unit="s", + explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS, + ) + self.apmplus_tool_token_usage = self.meter.create_histogram( + name=Meters.APMPLUS_TOOL_TOKEN_USAGE, + description="Token consumption of APMPlus tool token", + unit="count", + explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS, + ) + def record_call_llm( self, invocation_context: InvocationContext, @@ -207,7 +227,8 @@ def record_call_llm( attributes = { "gen_ai_system": "volcengine", "gen_ai_response_model": llm_request.model, - "gen_ai_operation_name": "chat_completions", + "gen_ai_operation_name": "chat", + "gen_ai_operation_type": "llm", "stream": "false", "server_address": "api.volcengine.com", } # required by Volcengine APMPlus @@ -269,6 +290,17 @@ def record_call_llm( # time_per_output_token, attributes=attributes # ) + # add span name attribute + span = trace.get_current_span() + if not span: + return + + # record span latency + if hasattr(span, "start_time") and self.apmplus_span_latency: + # span 耗时 + duration = (time.time_ns() - span.start_time) / 1e9 # type: ignore + self.apmplus_span_latency.record(duration, attributes=attributes) + def record_tool_call( self, tool: BaseTool, @@ -276,6 +308,44 @@ def record_tool_call( function_response_event: Event, ): logger.debug(f"Record tool call work in progress. Tool: {tool.name}") + span = trace.get_current_span() + if not span: + return + operation_type = "tool" + operation_name = tool.name + operation_backend = "" + if tool.custom_metadata: + operation_backend = tool.custom_metadata.get("backend", "") + + attributes = { + "gen_ai_operation_name": operation_name, + "gen_ai_operation_type": operation_type, + "gen_ai_operation_backend": operation_backend, + } + + if hasattr(span, "start_time") and self.apmplus_span_latency: + # span 耗时 + duration = (time.time_ns() - span.start_time) / 1e9 # type: ignore + self.apmplus_span_latency.record(duration, attributes=attributes) + + if self.apmplus_tool_token_usage and hasattr(span, "attributes"): + tool_input = span.attributes["gen_ai.tool.input"] + tool_token_usage_input = ( + len(tool_input) / 4 + ) # tool token 数量,使用文本长度/4 + input_tool_token_attributes = {**attributes, "token_type": "input"} + self.apmplus_tool_token_usage.record( + tool_token_usage_input, attributes=input_tool_token_attributes + ) + + tool_output = span.attributes["gen_ai.tool.output"] + tool_token_usage_output = ( + len(tool_output) / 4 + ) # tool token 数量,使用文本长度/4 + output_tool_token_attributes = {**attributes, "token_type": "output"} + self.apmplus_tool_token_usage.record( + tool_token_usage_output, attributes=output_tool_token_attributes + ) class APMPlusExporterConfig(BaseModel):