Creates a context cache for the specified model and content.
Context caching allows you to store and reuse content (e.g., PDFs, images) for faster processing. This is useful when you have large amounts of context that you want to reuse across multiple requests.
When using cached content, you cannot specify system_instruction,
tools, or tool_config in subsequent API requests. These must be part
of the cached content. Do not call .bind_tools() when using a model with
cached content that already includes tools.
create_context_cache(
model: ChatGoogleGenerativeAI,
messages: list[BaseMessage],
*,
ttl: str | None = None,
expire_time: str | None = None,
tools: list[BaseTool | type[BaseModel] | dict | Callable] | None = None,
tool_choice: _ToolChoiceType | bool | None = None
) -> strExample:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI, create_context_cache
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
# Example 1: Cache with text content
cache = create_context_cache(
model,
messages=[
SystemMessage(content="You are an expert researcher."),
HumanMessage(content="Large document content here..."),
],
ttl="3600s", # 1 hour
)
# Example 2: Cache with uploaded files (Gemini API)
# Note: gs:// URIs are NOT supported with Gemini API.
# Files must be uploaded first using client.files.upload()
file = model.client.files.upload(file="document.pdf")
cache = create_context_cache(
model,
messages=[
SystemMessage(content="You are an expert researcher."),
HumanMessage(
content=[
{
"type": "media",
"file_uri": file.uri, # Use the uploaded file's URI
"mime_type": "application/pdf",
}
]
),
],
ttl="3600s",
)
# Use the cache in subsequent requests
response = model.invoke(
"Summarize the document.",
cached_content=cache,
)
# Example 3: Cache with tools (correct usage)
from langchain_core.tools import tool
@tool
def search_database(query: str) -> str:
'''Search the database.'''
return f"Results for: {query}"
# Create cache WITH tools
cache_with_tools = create_context_cache(
model,
messages=[
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content="Large context here..."),
],
tools=[search_database],
ttl="3600s",
)
# When using the cache, do NOT bind tools again
# The tools are already in the cache
model_with_cache = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
cached_content=cache_with_tools,
)
# DON'T do this: .bind_tools([search_database])
response = model_with_cache.invoke("Search for X")| Name | Type | Description |
|---|---|---|
model* | ChatGoogleGenerativeAI |
Must be a model that supports context caching. |
messages* | list[BaseMessage] | List of Can include system messages, human messages, and multimodal content (images, PDFs, etc.). |
ttl | str | None | Default: NoneTime-to-live for the cache in seconds (e.g., At most one of |
expire_time | str | None | Default: NoneAbsolute expiration time (ISO 8601 format). At most one of |
tools | list[BaseTool | type[BaseModel] | dict | Callable] | None | Default: NoneOptional list of tools to bind to the cached context. Can be:
|
tool_choice | _ToolChoiceType | bool | None | Default: NoneOptional tool choice configuration. |