Skip to content

Commit e57caa3

Browse files
committed
GH-1403: Add Anthropic prompt caching via AnthropicChatOptions
- Add cacheControl field to AnthropicChatOptions with builder method - Create AnthropicCacheType enum with EPHEMERAL type for type-safe cache creation - Update AnthropicChatModel.createRequest() to apply cache control from options to user message ContentBlocks - Extend ContentBlock record with cacheControl parameter and constructor for API compatibility - Update Usage record to include cacheCreationInputTokens and cacheReadInputTokens fields - Update StreamHelper to handle new Usage constructor with cache token parameters - Add AnthropicApiIT.chatWithPromptCache() test for low-level API validation - Add AnthropicChatModelIT.chatWithPromptCacheViaOptions() integration test - Add comprehensive unit tests for AnthropicChatOptions cache control functionality - Update documentation with cacheControl() method examples and usage patterns Cache control is configured through AnthropicChatOptions rather than message classes to maintain provider portability. The cache control gets applied during request creation in AnthropicChatModel when building ContentBlocks for user messages. Original implementation provided by @Claudio-code (Claudio Silva Junior) See 15e5026 Fixes #1403 Signed-off-by: Soby Chacko <[email protected]>
1 parent 36a97d3 commit e57caa3

File tree

9 files changed

+507
-22
lines changed

9 files changed

+507
-22
lines changed

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@
6464
import org.springframework.ai.content.Media;
6565
import org.springframework.ai.model.ModelOptionsUtils;
6666
import org.springframework.ai.model.tool.DefaultToolExecutionEligibilityPredicate;
67-
import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
6867
import org.springframework.ai.model.tool.ToolCallingChatOptions;
6968
import org.springframework.ai.model.tool.ToolCallingManager;
7069
import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
7170
import org.springframework.ai.model.tool.ToolExecutionResult;
71+
import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
7272
import org.springframework.ai.retry.RetryUtils;
7373
import org.springframework.ai.support.UsageCalculator;
7474
import org.springframework.ai.tool.definition.ToolDefinition;
@@ -482,12 +482,25 @@ private Map<String, String> mergeHttpHeaders(Map<String, String> runtimeHttpHead
482482

483483
ChatCompletionRequest createRequest(Prompt prompt, boolean stream) {
484484

485+
// Get cache control from options
486+
AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
487+
AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null)
488+
? requestOptions.getCacheControl() : null;
489+
485490
List<AnthropicMessage> userMessages = prompt.getInstructions()
486491
.stream()
487492
.filter(message -> message.getMessageType() != MessageType.SYSTEM)
488493
.map(message -> {
489494
if (message.getMessageType() == MessageType.USER) {
490-
List<ContentBlock> contents = new ArrayList<>(List.of(new ContentBlock(message.getText())));
495+
List<ContentBlock> contents = new ArrayList<>();
496+
497+
// Apply cache control if enabled for user messages
498+
if (cacheControl != null) {
499+
contents.add(new ContentBlock(message.getText(), cacheControl));
500+
}
501+
else {
502+
contents.add(new ContentBlock(message.getText()));
503+
}
491504
if (message instanceof UserMessage userMessage) {
492505
if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
493506
List<ContentBlock> mediaContent = userMessage.getMedia().stream().map(media -> {
@@ -537,7 +550,6 @@ else if (message.getMessageType() == MessageType.TOOL) {
537550
ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages,
538551
systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream);
539552

540-
AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
541553
request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class);
542554

543555
// Add the tool definitions to the request's tools parameter.

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
* @author Thomas Vitale
4545
* @author Alexandros Pappas
4646
* @author Ilayaperumal Gopinathan
47+
* @author Soby Chacko
4748
* @since 1.0.0
4849
*/
4950
@JsonInclude(Include.NON_NULL)
@@ -59,6 +60,20 @@ public class AnthropicChatOptions implements ToolCallingChatOptions {
5960
private @JsonProperty("top_k") Integer topK;
6061
private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking;
6162

63+
/**
64+
* Cache control for user messages. When set, enables caching for user messages.
65+
* Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest.
66+
*/
67+
private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl;
68+
69+
public ChatCompletionRequest.CacheControl getCacheControl() {
70+
return cacheControl;
71+
}
72+
73+
public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) {
74+
this.cacheControl = cacheControl;
75+
}
76+
6277
/**
6378
* Collection of {@link ToolCallback}s to be used for tool calling in the chat
6479
* completion requests.
@@ -111,6 +126,7 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions)
111126
.internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled())
112127
.toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null)
113128
.httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null)
129+
.cacheControl(fromOptions.getCacheControl())
114130
.build();
115131
}
116132

@@ -267,12 +283,10 @@ public AnthropicChatOptions copy() {
267283

268284
@Override
269285
public boolean equals(Object o) {
270-
if (this == o) {
286+
if (this == o)
271287
return true;
272-
}
273-
if (!(o instanceof AnthropicChatOptions that)) {
288+
if (!(o instanceof AnthropicChatOptions that))
274289
return false;
275-
}
276290
return Objects.equals(this.model, that.model) && Objects.equals(this.maxTokens, that.maxTokens)
277291
&& Objects.equals(this.metadata, that.metadata)
278292
&& Objects.equals(this.stopSequences, that.stopSequences)
@@ -282,14 +296,15 @@ public boolean equals(Object o) {
282296
&& Objects.equals(this.toolNames, that.toolNames)
283297
&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled)
284298
&& Objects.equals(this.toolContext, that.toolContext)
285-
&& Objects.equals(this.httpHeaders, that.httpHeaders);
299+
&& Objects.equals(this.httpHeaders, that.httpHeaders)
300+
&& Objects.equals(this.cacheControl, that.cacheControl);
286301
}
287302

288303
@Override
289304
public int hashCode() {
290305
return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP,
291306
this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
292-
this.toolContext, this.httpHeaders);
307+
this.toolContext, this.httpHeaders, this.cacheControl);
293308
}
294309

295310
public static class Builder {
@@ -389,6 +404,14 @@ public Builder httpHeaders(Map<String, String> httpHeaders) {
389404
return this;
390405
}
391406

407+
/**
408+
* Set cache control for user messages
409+
*/
410+
public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) {
411+
this.options.cacheControl = cacheControl;
412+
return this;
413+
}
414+
392415
public AnthropicChatOptions build() {
393416
return this.options;
394417
}

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import org.slf4j.Logger;
2828
import org.slf4j.LoggerFactory;
29+
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
2930
import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder;
3031
import org.springframework.ai.model.ApiKey;
3132
import org.springframework.ai.model.ChatModelDescription;
@@ -66,6 +67,7 @@
6667
* @author Jonghoon Park
6768
* @author Claudio Silva Junior
6869
* @author Filip Hrisafov
70+
* @author Soby Chacko
6971
* @since 1.0.0
7072
*/
7173
public final class AnthropicApi {
@@ -559,6 +561,14 @@ public record Metadata(@JsonProperty("user_id") String userId) {
559561

560562
}
561563

564+
/**
565+
* @param type is the cache type supported by anthropic. <a href=
566+
* "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Doc</a>
567+
*/
568+
@JsonInclude(Include.NON_NULL)
569+
public record CacheControl(String type) {
570+
}
571+
562572
/**
563573
* Configuration for the model's thinking mode.
564574
*
@@ -765,8 +775,11 @@ public record ContentBlock(
765775
@JsonProperty("thinking") String thinking,
766776

767777
// Redacted Thinking only
768-
@JsonProperty("data") String data
769-
) {
778+
@JsonProperty("data") String data,
779+
780+
// cache object
781+
@JsonProperty("cache_control") CacheControl cacheControl
782+
) {
770783
// @formatter:on
771784

772785
/**
@@ -784,23 +797,27 @@ public ContentBlock(String mediaType, String data) {
784797
* @param source The source of the content.
785798
*/
786799
public ContentBlock(Type type, Source source) {
787-
this(type, source, null, null, null, null, null, null, null, null, null, null);
800+
this(type, source, null, null, null, null, null, null, null, null, null, null, null);
788801
}
789802

790803
/**
791804
* Create content block
792805
* @param source The source of the content.
793806
*/
794807
public ContentBlock(Source source) {
795-
this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null);
808+
this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null);
796809
}
797810

798811
/**
799812
* Create content block
800813
* @param text The text of the content.
801814
*/
802815
public ContentBlock(String text) {
803-
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null);
816+
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null);
817+
}
818+
819+
public ContentBlock(String text, CacheControl cache) {
820+
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache);
804821
}
805822

806823
// Tool result
@@ -811,7 +828,7 @@ public ContentBlock(String text) {
811828
* @param content The content of the tool result.
812829
*/
813830
public ContentBlock(Type type, String toolUseId, String content) {
814-
this(type, null, null, null, null, null, null, toolUseId, content, null, null, null);
831+
this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null);
815832
}
816833

817834
/**
@@ -822,7 +839,7 @@ public ContentBlock(Type type, String toolUseId, String content) {
822839
* @param index The index of the content block.
823840
*/
824841
public ContentBlock(Type type, Source source, String text, Integer index) {
825-
this(type, source, text, index, null, null, null, null, null, null, null, null);
842+
this(type, source, text, index, null, null, null, null, null, null, null, null, null);
826843
}
827844

828845
// Tool use input JSON delta streaming
@@ -834,7 +851,7 @@ public ContentBlock(Type type, Source source, String text, Integer index) {
834851
* @param input The input of the tool use.
835852
*/
836853
public ContentBlock(Type type, String id, String name, Map<String, Object> input) {
837-
this(type, null, null, null, id, name, input, null, null, null, null, null);
854+
this(type, null, null, null, id, name, input, null, null, null, null, null, null);
838855
}
839856

840857
/**
@@ -1028,7 +1045,9 @@ public record ChatCompletionResponse(
10281045
public record Usage(
10291046
// @formatter:off
10301047
@JsonProperty("input_tokens") Integer inputTokens,
1031-
@JsonProperty("output_tokens") Integer outputTokens) {
1048+
@JsonProperty("output_tokens") Integer outputTokens,
1049+
@JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens,
1050+
@JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) {
10321051
// @formatter:off
10331052
}
10341053

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright 2025-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.anthropic.api;
18+
19+
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
20+
21+
import java.util.function.Supplier;
22+
23+
/**
24+
* Cache types supported by Anthropic's prompt caching feature.
25+
*
26+
* <p>
27+
* Prompt caching allows reusing frequently used prompts to reduce costs and improve
28+
* response times for repeated interactions.
29+
*
30+
* @see <a href=
31+
* "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching">Anthropic Prompt
32+
* Caching</a>
33+
* @author Claudio Silva Junior
34+
* @author Soby Chacko
35+
*/
36+
public enum AnthropicCacheType {
37+
38+
/**
39+
* Ephemeral cache with 5-minute lifetime, refreshed on each use.
40+
*/
41+
EPHEMERAL(() -> new CacheControl("ephemeral"));
42+
43+
private final Supplier<CacheControl> value;
44+
45+
AnthropicCacheType(Supplier<CacheControl> value) {
46+
this.value = value;
47+
}
48+
49+
/**
50+
* Returns a new CacheControl instance for this cache type.
51+
* @return a CacheControl instance configured for this cache type
52+
*/
53+
public CacheControl cacheControl() {
54+
return value.get();
55+
}
56+
57+
}

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
* @author Christian Tzolov
5656
* @author Jihoon Kim
5757
* @author Alexandros Pappas
58+
* @author Claudio Silva Junior
59+
* @author Soby Chacko
5860
* @since 1.0.0
5961
*/
6062
public class StreamHelper {
@@ -159,7 +161,7 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_START)) {
159161
}
160162
else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
161163
ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
162-
null, null, null, null, null, thinkingBlock.thinking(), null);
164+
null, null, null, null, null, thinkingBlock.thinking(), null, null);
163165
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
164166
}
165167
else {
@@ -176,12 +178,12 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) {
176178
}
177179
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
178180
ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
179-
null, null, null, null, null, null, thinking.thinking(), null);
181+
null, null, null, null, null, null, thinking.thinking(), null, null);
180182
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
181183
}
182184
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
183185
ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
184-
null, null, null, null, null, sig.signature(), null, null);
186+
null, null, null, null, null, sig.signature(), null, null, null);
185187
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
186188
}
187189
else {
@@ -205,7 +207,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) {
205207

206208
if (messageDeltaEvent.usage() != null) {
207209
Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
208-
messageDeltaEvent.usage().outputTokens());
210+
messageDeltaEvent.usage().outputTokens(),
211+
contentBlockReference.get().usage.cacheCreationInputTokens(),
212+
contentBlockReference.get().usage.cacheReadInputTokens());
209213
contentBlockReference.get().withUsage(totalUsage);
210214
}
211215
}

0 commit comments

Comments
 (0)