diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
index 2d36014a719..0ada0668e2f 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
@@ -64,11 +64,11 @@
 import org.springframework.ai.content.Media;
 import org.springframework.ai.model.ModelOptionsUtils;
 import org.springframework.ai.model.tool.DefaultToolExecutionEligibilityPredicate;
-import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -482,12 +482,25 @@ private Map<String, String> mergeHttpHeaders(Map<String, String> runtimeHttpHead
 
 	ChatCompletionRequest createRequest(Prompt prompt, boolean stream) {
 
+		// Get cache control from options
+		AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
+		AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null)
+				? requestOptions.getCacheControl() : null;
+
 		List<AnthropicMessage> userMessages = prompt.getInstructions()
 			.stream()
 			.filter(message -> message.getMessageType() != MessageType.SYSTEM)
 			.map(message -> {
 				if (message.getMessageType() == MessageType.USER) {
-					List<ContentBlock> contents = new ArrayList<>(List.of(new ContentBlock(message.getText())));
+					List<ContentBlock> contents = new ArrayList<>();
+
+					// Apply cache control if enabled for user messages
+					if (cacheControl != null) {
+						contents.add(new ContentBlock(message.getText(), cacheControl));
+					}
+					else {
+						contents.add(new ContentBlock(message.getText()));
+					}
 					if (message instanceof UserMessage userMessage) {
 						if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
 							List<ContentBlock> mediaContent = userMessage.getMedia().stream().map(media -> {
@@ -537,7 +550,6 @@ else if (message.getMessageType() == MessageType.TOOL) {
 		ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages,
 				systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream);
 
-		AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
 		request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class);
 
 		// Add the tool definitions to the request's tools parameter.
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
index dbfbee561c8..536063ed546 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java
@@ -44,6 +44,7 @@
  * @author Thomas Vitale
  * @author Alexandros Pappas
  * @author Ilayaperumal Gopinathan
+ * @author Soby Chacko
  * @since 1.0.0
  */
 @JsonInclude(Include.NON_NULL)
@@ -59,6 +60,20 @@ public class AnthropicChatOptions implements ToolCallingChatOptions {
 	private @JsonProperty("top_k") Integer topK;
 	private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking;
 
+	/**
+	 * Cache control for user messages. When set, enables caching for user messages.
+	 * Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest.
+	 */
+	private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl;
+
+	public ChatCompletionRequest.CacheControl getCacheControl() {
+		return cacheControl;
+	}
+
+	public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) {
+		this.cacheControl = cacheControl;
+	}
+
 	/**
 	 * Collection of {@link ToolCallback}s to be used for tool calling in the chat
 	 * completion requests.
@@ -111,6 +126,7 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions)
 			.internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled())
 			.toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null)
 			.httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null)
+			.cacheControl(fromOptions.getCacheControl())
 			.build();
 	}
 
@@ -267,12 +283,10 @@ public AnthropicChatOptions copy() {
 
 	@Override
 	public boolean equals(Object o) {
-		if (this == o) {
+		if (this == o)
 			return true;
-		}
-		if (!(o instanceof AnthropicChatOptions that)) {
+		if (!(o instanceof AnthropicChatOptions that))
 			return false;
-		}
 		return Objects.equals(this.model, that.model) && Objects.equals(this.maxTokens, that.maxTokens)
 				&& Objects.equals(this.metadata, that.metadata)
 				&& Objects.equals(this.stopSequences, that.stopSequences)
@@ -282,14 +296,15 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.toolNames, that.toolNames)
 				&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled)
 				&& Objects.equals(this.toolContext, that.toolContext)
-				&& Objects.equals(this.httpHeaders, that.httpHeaders);
+				&& Objects.equals(this.httpHeaders, that.httpHeaders)
+				&& Objects.equals(this.cacheControl, that.cacheControl);
 	}
 
 	@Override
 	public int hashCode() {
 		return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP,
 				this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
-				this.toolContext, this.httpHeaders);
+				this.toolContext, this.httpHeaders, this.cacheControl);
 	}
 
 	public static class Builder {
@@ -389,6 +404,14 @@ public Builder httpHeaders(Map<String, String> httpHeaders) {
 			return this;
 		}
 
+		/**
+		 * Set cache control for user messages
+		 */
+		public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) {
+			this.options.cacheControl = cacheControl;
+			return this;
+		}
+
 		public AnthropicChatOptions build() {
 			return this.options;
 		}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
index cf410690216..c2bf307ea4f 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
@@ -26,6 +26,7 @@
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
 import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder;
 import org.springframework.ai.model.ApiKey;
 import org.springframework.ai.model.ChatModelDescription;
@@ -66,6 +67,7 @@
  * @author Jonghoon Park
  * @author Claudio Silva Junior
  * @author Filip Hrisafov
+ * @author Soby Chacko
  * @since 1.0.0
  */
 public final class AnthropicApi {
@@ -559,6 +561,14 @@ public record Metadata(@JsonProperty("user_id") String userId) {
 
 		}
 
+		/**
+		 * @param type is the cache type supported by anthropic. <a href=
+		 * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Doc</a>
+		 */
+		@JsonInclude(Include.NON_NULL)
+		public record CacheControl(String type) {
+		}
+
 		/**
 		 * Configuration for the model's thinking mode.
 		 *
@@ -765,8 +775,11 @@ public record ContentBlock(
 		@JsonProperty("thinking") String thinking,
 
 		// Redacted Thinking only
-		@JsonProperty("data") String data
-		) {
+		@JsonProperty("data") String data,
+
+		// cache object
+		@JsonProperty("cache_control") CacheControl cacheControl
+	) {
 		// @formatter:on
 
 		/**
@@ -784,7 +797,7 @@ public ContentBlock(String mediaType, String data) {
 		 * @param source The source of the content.
 		 */
 		public ContentBlock(Type type, Source source) {
-			this(type, source, null, null, null, null, null, null, null, null, null, null);
+			this(type, source, null, null, null, null, null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -792,7 +805,7 @@ public ContentBlock(Type type, Source source) {
 		 * @param source The source of the content.
 		 */
 		public ContentBlock(Source source) {
-			this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null);
+			this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -800,7 +813,11 @@ public ContentBlock(Source source) {
 		 * @param text The text of the content.
 		 */
 		public ContentBlock(String text) {
-			this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null);
+			this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null);
+		}
+
+		public ContentBlock(String text, CacheControl cache) {
+			this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache);
 		}
 
 		// Tool result
@@ -811,7 +828,7 @@ public ContentBlock(String text) {
 		 * @param content The content of the tool result.
 		 */
 		public ContentBlock(Type type, String toolUseId, String content) {
-			this(type, null, null, null, null, null, null, toolUseId, content, null, null, null);
+			this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null);
 		}
 
 		/**
@@ -822,7 +839,7 @@ public ContentBlock(Type type, String toolUseId, String content) {
 		 * @param index The index of the content block.
 		 */
 		public ContentBlock(Type type, Source source, String text, Integer index) {
-			this(type, source, text, index, null, null, null, null, null, null, null, null);
+			this(type, source, text, index, null, null, null, null, null, null, null, null, null);
 		}
 
 		// Tool use input JSON delta streaming
@@ -834,7 +851,7 @@ public ContentBlock(Type type, Source source, String text, Integer index) {
 		 * @param input The input of the tool use.
 		 */
 		public ContentBlock(Type type, String id, String name, Map<String, Object> input) {
-			this(type, null, null, null, id, name, input, null, null, null, null, null);
+			this(type, null, null, null, id, name, input, null, null, null, null, null, null);
 		}
 
 		/**
@@ -1028,7 +1045,9 @@ public record ChatCompletionResponse(
 	public record Usage(
 	// @formatter:off
 		@JsonProperty("input_tokens") Integer inputTokens,
-		@JsonProperty("output_tokens") Integer outputTokens) {
+		@JsonProperty("output_tokens") Integer outputTokens,
+		@JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens,
+		@JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) {
 		// @formatter:off
 	}
 
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java
new file mode 100644
index 00000000000..a120e3f0f89
--- /dev/null
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2025-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.anthropic.api;
+
+import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
+
+import java.util.function.Supplier;
+
+/**
+ * Cache types supported by Anthropic's prompt caching feature.
+ *
+ * <p>
+ * Prompt caching allows reusing frequently used prompts to reduce costs and improve
+ * response times for repeated interactions.
+ *
+ * @see <a href=
+ * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching">Anthropic Prompt
+ * Caching</a>
+ * @author Claudio Silva Junior
+ * @author Soby Chacko
+ */
+public enum AnthropicCacheType {
+
+	/**
+	 * Ephemeral cache with 5-minute lifetime, refreshed on each use.
+	 */
+	EPHEMERAL(() -> new CacheControl("ephemeral"));
+
+	private final Supplier<CacheControl> value;
+
+	AnthropicCacheType(Supplier<CacheControl> value) {
+		this.value = value;
+	}
+
+	/**
+	 * Returns a new CacheControl instance for this cache type.
+	 * @return a CacheControl instance configured for this cache type
+	 */
+	public CacheControl cacheControl() {
+		return value.get();
+	}
+
+}
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
index f636f29a158..e56e469cac1 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
@@ -55,6 +55,8 @@
  * @author Christian Tzolov
  * @author Jihoon Kim
  * @author Alexandros Pappas
+ * @author Claudio Silva Junior
+ * @author Soby Chacko
  * @since 1.0.0
  */
 public class StreamHelper {
@@ -159,7 +161,7 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_START)) {
 			}
 			else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
 				ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
-						null, null, null, null, null, thinkingBlock.thinking(), null);
+						null, null, null, null, null, thinkingBlock.thinking(), null, null);
 				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
 			}
 			else {
@@ -176,12 +178,12 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) {
 			}
 			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
 				ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
-						null, null, null, null, null, null, thinking.thinking(), null);
+						null, null, null, null, null, null, thinking.thinking(), null, null);
 				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
 			}
 			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
 				ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
-						null, null, null, null, null, sig.signature(), null, null);
+						null, null, null, null, null, sig.signature(), null, null, null);
 				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
 			}
 			else {
@@ -205,7 +207,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) {
 
 			if (messageDeltaEvent.usage() != null) {
 				Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
-						messageDeltaEvent.usage().outputTokens());
+						messageDeltaEvent.usage().outputTokens(),
+						contentBlockReference.get().usage.cacheCreationInputTokens(),
+						contentBlockReference.get().usage.cacheReadInputTokens());
 				contentBlockReference.get().withUsage(totalUsage);
 			}
 		}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
index 6570d5ee6a6..5243fcccab0 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
@@ -32,6 +32,7 @@
 import reactor.core.publisher.Flux;
 
 import org.springframework.ai.anthropic.api.AnthropicApi;
+import org.springframework.ai.anthropic.api.AnthropicCacheType;
 import org.springframework.ai.anthropic.api.tool.MockWeatherService;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.messages.AssistantMessage;
@@ -491,6 +492,59 @@ void testToolUseContentBlock() {
 		}
 	}
 
+	@Test
+	void chatWithPromptCacheViaOptions() {
+		String userMessageText = "foobar It could be eitherr a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which "
+				+ "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" "
+				+ "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English "
+				+ "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". "
+				+ "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\"";
+
+		// Repeat content to meet minimum token requirements for caching (1024+ tokens)
+		String largeContent = userMessageText.repeat(20);
+
+		// First request - should create cache
+		ChatResponse firstResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)),
+				AnthropicChatOptions.builder()
+					.model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue())
+					.cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+					.maxTokens(100)
+					.temperature(0.8)
+					.build()));
+
+		// Access native Anthropic usage data
+		AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata().getUsage().getNativeUsage();
+
+		// Verify first request created cache
+		assertThat(firstUsage.cacheCreationInputTokens()).isGreaterThan(0);
+		assertThat(firstUsage.cacheReadInputTokens()).isEqualTo(0);
+
+		// Second request with identical content - should read from cache
+		ChatResponse secondResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)),
+				AnthropicChatOptions.builder()
+					.model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue())
+					.cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+					.maxTokens(100)
+					.temperature(0.8)
+					.build()));
+
+		// Access native Anthropic usage data
+		AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata().getUsage().getNativeUsage();
+
+		// Verify second request used cache
+		assertThat(secondUsage.cacheCreationInputTokens()).isEqualTo(0);
+		assertThat(secondUsage.cacheReadInputTokens()).isGreaterThan(0);
+
+		// Both responses should be valid
+		assertThat(firstResponse.getResult().getOutput().getText()).isNotBlank();
+		assertThat(secondResponse.getResult().getOutput().getText()).isNotBlank();
+
+		logger.info("First request - Cache creation: {}, Cache read: {}", firstUsage.cacheCreationInputTokens(),
+				firstUsage.cacheReadInputTokens());
+		logger.info("Second request - Cache creation: {}, Cache read: {}", secondUsage.cacheCreationInputTokens(),
+				secondUsage.cacheReadInputTokens());
+	}
+
 	record ActorsFilmsRecord(String actor, List<String> movies) {
 
 	}
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
index d9470070e95..72c2cbc01f2 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java
@@ -23,6 +23,8 @@
 import org.junit.jupiter.api.Test;
 
 import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.Metadata;
+import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
+import org.springframework.ai.anthropic.api.AnthropicCacheType;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -30,6 +32,7 @@
  * Tests for {@link AnthropicChatOptions}.
  *
  * @author Alexandros Pappas
+ * @author Soby Chacko
  */
 class AnthropicChatOptionsTests {
 
@@ -471,4 +474,109 @@ void testSetterOverwriteBehavior() {
 		assertThat(options.getMaxTokens()).isEqualTo(10);
 	}
 
+	@Test
+	void testCacheControlBuilder() {
+		CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl();
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model("test-model")
+			.cacheControl(cacheControl)
+			.build();
+
+		assertThat(options.getCacheControl()).isEqualTo(cacheControl);
+		assertThat(options.getCacheControl().type()).isEqualTo("ephemeral");
+	}
+
+	@Test
+	void testCacheControlDefaultValue() {
+		AnthropicChatOptions options = new AnthropicChatOptions();
+		assertThat(options.getCacheControl()).isNull();
+	}
+
+	@Test
+	void testCacheControlEqualsAndHashCode() {
+		CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl();
+
+		AnthropicChatOptions options1 = AnthropicChatOptions.builder()
+			.model("test-model")
+			.cacheControl(cacheControl)
+			.build();
+
+		AnthropicChatOptions options2 = AnthropicChatOptions.builder()
+			.model("test-model")
+			.cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+			.build();
+
+		AnthropicChatOptions options3 = AnthropicChatOptions.builder().model("test-model").build();
+
+		assertThat(options1).isEqualTo(options2);
+		assertThat(options1.hashCode()).isEqualTo(options2.hashCode());
+
+		assertThat(options1).isNotEqualTo(options3);
+		assertThat(options1.hashCode()).isNotEqualTo(options3.hashCode());
+	}
+
+	@Test
+	void testCacheControlCopy() {
+		CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl();
+
+		AnthropicChatOptions original = AnthropicChatOptions.builder()
+			.model("test-model")
+			.cacheControl(originalCacheControl)
+			.build();
+
+		AnthropicChatOptions copied = original.copy();
+
+		assertThat(copied).isNotSameAs(original).isEqualTo(original);
+		assertThat(copied.getCacheControl()).isEqualTo(original.getCacheControl());
+		assertThat(copied.getCacheControl()).isEqualTo(originalCacheControl);
+	}
+
+	@Test
+	void testCacheControlWithNullValue() {
+		AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").cacheControl(null).build();
+
+		assertThat(options.getCacheControl()).isNull();
+	}
+
+	@Test
+	void testBuilderWithAllFieldsIncludingCacheControl() {
+		CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl();
+
+		AnthropicChatOptions options = AnthropicChatOptions.builder()
+			.model("test-model")
+			.maxTokens(100)
+			.stopSequences(List.of("stop1", "stop2"))
+			.temperature(0.7)
+			.topP(0.8)
+			.topK(50)
+			.metadata(new Metadata("userId_123"))
+			.cacheControl(cacheControl)
+			.build();
+
+		assertThat(options)
+			.extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata",
+					"cacheControl")
+			.containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"),
+					cacheControl);
+	}
+
+	@Test
+	void testCacheControlMutationDoesNotAffectOriginal() {
+		CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl();
+
+		AnthropicChatOptions original = AnthropicChatOptions.builder()
+			.model("original-model")
+			.cacheControl(originalCacheControl)
+			.build();
+
+		AnthropicChatOptions copy = original.copy();
+		copy.setCacheControl(null);
+
+		// Original should remain unchanged
+		assertThat(original.getCacheControl()).isEqualTo(originalCacheControl);
+		// Copy should have null cache control
+		assertThat(copy.getCacheControl()).isNull();
+	}
+
 }
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
index c78386fb7ce..6e387996b23 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
@@ -44,6 +44,8 @@
  * @author Christian Tzolov
  * @author Jihoon Kim
  * @author Alexandros Pappas
+ * @author Claudio Silva Junior
+ * @author Soby Chacko
  */
 @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
 public class AnthropicApiIT {
@@ -70,6 +72,37 @@ public class AnthropicApiIT {
 					}
 					""")));
 
+	@Test
+	void chatWithPromptCache() {
+		String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which "
+				+ "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" "
+				+ "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English "
+				+ "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". "
+				+ "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\"";
+
+		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
+				List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())),
+				Role.USER);
+
+		ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest(
+				AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), List.of(chatCompletionMessage), null, 100, 0.8,
+				false);
+
+		// First request - creates cache
+		AnthropicApi.Usage createdCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest)
+			.getBody()
+			.usage();
+
+		assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0);
+		assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0);
+
+		// Second request - reads from cache (same request)
+		AnthropicApi.Usage readCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest).getBody().usage();
+
+		assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0);
+		assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0);
+	}
+
 	@Test
 	void chatCompletionEntity() {
 
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
index 2094ab4ee17..f8d08b31e8a 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
@@ -191,6 +191,181 @@ ChatResponse response = chatModel.call(
 
 TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/DefaultChatOptionsBuilder.java[ChatOptions#builder()].
 
+== Prompt Caching
+
+Anthropic's prompt caching feature allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions.
+When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed.
+
+[NOTE]
+====
+*Supported Models*
+
+Prompt caching is currently supported on Claude Opus 4, Claude Sonnet 4, Claude Sonnet 3.7, Claude Sonnet 3.5, Claude Haiku 3.5, Claude Haiku 3, and Claude Opus 3.
+====
+
+=== Cache Types
+
+Spring AI supports Anthropic's cache types through the `AnthropicCacheType` enum:
+
+* `EPHEMERAL`: Temporary caching suitable for short-term reuse within a session
+
+=== Enabling Prompt Caching
+
+To enable prompt caching, use the `cacheControl()` method in `AnthropicChatOptions`:
+
+==== Basic Usage
+
+[source,java]
+----
+// Enable caching with ephemeral type
+ChatResponse response = chatModel.call(
+    new Prompt(
+        List.of(new UserMessage("Large content to be cached...")),
+        AnthropicChatOptions.builder()
+            .model("claude-3-5-sonnet-latest")
+            .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+            .build()
+    )
+);
+----
+
+==== Using ChatClient Fluent API
+
+[source,java]
+----
+String response = ChatClient.create(chatModel)
+    .prompt()
+    .user("Analyze this large document: " + document)
+    .options(AnthropicChatOptions.builder()
+        .model("claude-3-5-sonnet-latest")
+        .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+        .build())
+    .call()
+    .content();
+----
+
+=== Usage Example
+
+Here's a complete example demonstrating prompt caching with cost tracking:
+
+[source,java]
+----
+// Create content that will be reused multiple times
+String largeContent = "Large document content that meets minimum token requirements...";
+
+// First request - creates cache
+ChatResponse firstResponse = chatModel.call(
+    new Prompt(
+        List.of(new UserMessage(largeContent)),
+        AnthropicChatOptions.builder()
+            .model("claude-3-haiku-20240307")
+            .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+            .maxTokens(100)
+            .build()
+    )
+);
+
+// Access cache-related token usage
+AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata()
+    .getUsage().getNativeUsage();
+
+System.out.println("Cache creation tokens: " + firstUsage.cacheCreationInputTokens());
+System.out.println("Cache read tokens: " + firstUsage.cacheReadInputTokens());
+
+// Second request with identical content - reads from cache
+ChatResponse secondResponse = chatModel.call(
+    new Prompt(
+        List.of(new UserMessage(largeContent)),
+        AnthropicChatOptions.builder()
+            .model("claude-3-haiku-20240307")
+            .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
+            .maxTokens(100)
+            .build()
+    )
+);
+
+AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata()
+    .getUsage().getNativeUsage();
+
+System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens());
+System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens());
+----
+
+=== Token Usage Tracking
+
+The `Usage` record provides detailed information about cache-related token consumption.
+To access Anthropic-specific cache metrics, use the `getNativeUsage()` method:
+
+[source,java]
+----
+AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata()
+    .getUsage().getNativeUsage();
+----
+
+Cache-specific metrics include:
+
+* `cacheCreationInputTokens()`: Returns the number of tokens used when creating a cache entry
+* `cacheReadInputTokens()`: Returns the number of tokens read from an existing cache entry
+
+When you first send a cached prompt:
+- `cacheCreationInputTokens()` will be greater than 0
+- `cacheReadInputTokens()` will be 0
+
+When you send the same cached prompt again:
+- `cacheCreationInputTokens()` will be 0
+- `cacheReadInputTokens()` will be greater than 0
+
+=== Best Practices
+
+1. **Cache Long Prompts**: Focus on caching prompts that meet the minimum token requirements (1024+ tokens for most models, 2048+ for Haiku models).
+
+2. **Reuse Identical Content**: Caching works best with exact matches of prompt content.
+Even small changes will require a new cache entry.
+
+3. **Monitor Token Usage**: Use the enhanced usage statistics to track cache effectiveness and optimize your caching strategy.
+
+4. **Place Static Content First**: Position cached content (system instructions, context, examples) at the beginning of your prompt for optimal performance.
+
+5. **5-Minute Cache Lifetime**: Ephemeral caches expire after 5 minutes of inactivity.
+Each time cached content is accessed, the 5-minute timer resets.
+
+=== Low-level API Usage
+
+When using the low-level `AnthropicApi` directly, you can specify cache control through the `ContentBlock` constructor:
+
+[source,java]
+----
+// Create content block with cache control
+ContentBlock cachedContent = new ContentBlock(
+    "<the entire Encyclopedia Britannica>",
+    AnthropicCacheType.EPHEMERAL.cacheControl()
+);
+
+AnthropicMessage message = new AnthropicMessage(
+    List.of(cachedContent),
+    Role.USER
+);
+
+ChatCompletionRequest request = new ChatCompletionRequest(
+    AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(),
+    List.of(message),
+    null, 100, 0.8, false
+);
+
+ResponseEntity<ChatCompletionResponse> response = anthropicApi.chatCompletionEntity(request);
+
+// Access cache-related token usage
+Usage usage = response.getBody().usage();
+System.out.println("Cache creation tokens: " + usage.cacheCreationInputTokens());
+System.out.println("Cache read tokens: " + usage.cacheReadInputTokens());
+----
+
+=== Implementation Details
+
+Cache control is configured through `AnthropicChatOptions` rather than individual messages.
+This preserves compatibility when switching between different AI providers.
+The cache control gets applied during request creation in `AnthropicChatModel`.
+
 == Thinking
 
 Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning.