Skip to content

Commit 726487a

Browse files
committed
fix: per‑key traffic fields
1 parent 7f3c562 commit 726487a

File tree

5 files changed

+133
-1
lines changed

5 files changed

+133
-1
lines changed

packages/core/src/agent/agent.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,22 @@ export interface BaseGenerationOptions extends Partial<CallSettings> {
270270
userId?: string;
271271
conversationId?: string;
272272
tenantId?: string;
273+
/**
274+
* Optional key metadata for per-key rate limits.
275+
*/
276+
apiKeyId?: string;
277+
/**
278+
* Optional region metadata for per-region rate limits.
279+
*/
280+
region?: string;
281+
/**
282+
* Optional endpoint metadata for per-endpoint rate limits.
283+
*/
284+
endpoint?: string;
285+
/**
286+
* Optional tenant tier metadata for per-tier rate limits.
287+
*/
288+
tenantTier?: string;
273289
context?: ContextInput;
274290
elicitation?: (request: unknown) => Promise<unknown>;
275291
/**
@@ -2336,6 +2352,10 @@ export class Agent {
23362352
const startTimeDate = new Date();
23372353
const priority = this.resolveTrafficPriority(options);
23382354
const tenantId = this.resolveTenantId(options);
2355+
const apiKeyId = options?.apiKeyId ?? options?.parentOperationContext?.apiKeyId;
2356+
const region = options?.region ?? options?.parentOperationContext?.region;
2357+
const endpoint = options?.endpoint ?? options?.parentOperationContext?.endpoint;
2358+
const tenantTier = options?.tenantTier ?? options?.parentOperationContext?.tenantTier;
23392359

23402360
// Prefer reusing an existing context instance to preserve reference across calls/subagents
23412361
const runtimeContext = toContextMap(options?.context);
@@ -2445,6 +2465,10 @@ export class Agent {
24452465
userId: options?.userId,
24462466
conversationId: options?.conversationId,
24472467
tenantId,
2468+
apiKeyId,
2469+
region,
2470+
endpoint,
2471+
tenantTier,
24482472
parentAgentId: options?.parentAgentId,
24492473
traceContext,
24502474
startTime: startTimeDate,
@@ -4152,6 +4176,10 @@ export class Agent {
41524176
this.resolveProvider(this.model) ??
41534177
undefined;
41544178
const priority = this.resolveTrafficPriority(options);
4179+
const apiKeyId = options?.apiKeyId ?? options?.parentOperationContext?.apiKeyId;
4180+
const region = options?.region ?? options?.parentOperationContext?.region;
4181+
const endpoint = options?.endpoint ?? options?.parentOperationContext?.endpoint;
4182+
const tenantTier = options?.tenantTier ?? options?.parentOperationContext?.tenantTier;
41554183

41564184
return {
41574185
agentId: this.id, // Identify which agent issued the request
@@ -4160,6 +4188,10 @@ export class Agent {
41604188
provider, // Allows per-provider throttling later
41614189
priority,
41624190
tenantId: this.resolveTenantId(options),
4191+
apiKeyId,
4192+
region,
4193+
endpoint,
4194+
tenantTier,
41634195
taskType: options?.taskType,
41644196
fallbackPolicyId: options?.fallbackPolicyId,
41654197
};

packages/core/src/agent/types.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,18 @@ export type OperationContext = {
900900
/** Optional tenant identifier propagated across nested operations */
901901
tenantId?: string;
902902

903+
/** Optional key identifier for per-key traffic limits */
904+
apiKeyId?: string;
905+
906+
/** Optional region identifier for per-region traffic limits */
907+
region?: string;
908+
909+
/** Optional endpoint identifier for per-endpoint traffic limits */
910+
endpoint?: string;
911+
912+
/** Optional tenant tier identifier for per-tier traffic limits */
913+
tenantTier?: string;
914+
903915
/** User-managed context map for this operation */
904916
readonly context: Map<string | symbol, unknown>;
905917

packages/core/src/traffic/traffic-circuit-breaker.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,12 +359,19 @@ export class TrafficCircuitBreaker {
359359
): void {
360360
next.request = fallbackRequest;
361361
next.attempt = 1;
362+
next.estimatedTokens = fallbackRequest.estimatedTokens;
363+
next.reservedTokens = undefined;
362364
next.tenantConcurrencyKey = undefined;
363365
next.providerModelConcurrencyKey = undefined;
364366
next.rateLimitKey = undefined;
365367
next.etaMs = undefined;
366368
next.circuitKey = undefined;
367369
next.circuitStatus = undefined;
370+
next.extractUsage = fallbackRequest.extractUsage;
371+
if (context?.reason === "queue-timeout") {
372+
next.enqueuedAt = Date.now();
373+
next.dispatchedAt = undefined;
374+
}
368375
logger?.debug?.("Switched to fallback request", {
369376
previousCircuitKey: context?.previousCircuitKey,
370377
fallbackModel: fallback,

packages/core/src/traffic/traffic-controller.spec.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,3 +488,84 @@ describe("TrafficController stream reporting", () => {
488488
expect(order).toEqual(["fallback"]);
489489
});
490490
});
491+
492+
describe("TrafficController queue timeouts", () => {
493+
it("lets fallback requests wait after queue timeout without rejecting", async () => {
494+
vi.useFakeTimers();
495+
496+
try {
497+
vi.setSystemTime(new Date(0));
498+
const controller = new TrafficController({
499+
maxConcurrent: 1,
500+
fallbackChains: {
501+
"p::m": ["m-fallback"],
502+
},
503+
});
504+
const order: string[] = [];
505+
let releaseFirst!: () => void;
506+
const firstGate = new Promise<void>((resolve) => {
507+
releaseFirst = resolve;
508+
});
509+
510+
const first = controller.handleText({
511+
tenantId: "tenant-a",
512+
metadata: { provider: "p", model: "m", priority: "P1" },
513+
execute: async () => {
514+
order.push("first");
515+
await firstGate;
516+
return "first";
517+
},
518+
});
519+
520+
const second = controller.handleText({
521+
tenantId: "tenant-a",
522+
metadata: { provider: "p", model: "m", priority: "P1" },
523+
maxQueueWaitMs: 1,
524+
execute: async () => {
525+
order.push("primary");
526+
return "primary";
527+
},
528+
createFallbackRequest: (target) => ({
529+
tenantId: "tenant-a",
530+
metadata: {
531+
provider: "p",
532+
model: typeof target === "string" ? target : target.model,
533+
priority: "P1",
534+
},
535+
maxQueueWaitMs: 1,
536+
execute: async () => {
537+
order.push("fallback");
538+
return "fallback";
539+
},
540+
}),
541+
});
542+
543+
await Promise.resolve();
544+
expect(order).toEqual(["first"]);
545+
546+
await vi.advanceTimersByTimeAsync(2);
547+
548+
const third = controller.handleText({
549+
tenantId: "tenant-a",
550+
metadata: { provider: "p", model: "other", priority: "P1" },
551+
execute: async () => {
552+
order.push("third");
553+
return "third";
554+
},
555+
});
556+
557+
await Promise.resolve();
558+
expect(order).toEqual(["first"]);
559+
560+
releaseFirst();
561+
await vi.runAllTimersAsync();
562+
563+
await expect(second).resolves.toBe("fallback");
564+
await Promise.all([first, third]);
565+
566+
expect(order).toEqual(["first", "fallback", "third"]);
567+
} finally {
568+
vi.useRealTimers();
569+
}
570+
});
571+
});

packages/core/src/traffic/traffic-controller.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,7 @@ export class TrafficController {
857857
this.trafficLogger,
858858
);
859859
if (fallbackApplied) {
860-
return "expired";
860+
return "none";
861861
}
862862

863863
const timeoutError = this.createQueueTimeoutError(next, now);

0 commit comments

Comments
 (0)