Prompt Caching(提示词缓存)
对重复使用的长提示词开启缓存,后续请求费用降低约 90%,延迟降低约 85%。
- Python
- JavaScript
- curl
import anthropic
client = anthropic.Anthropic(api_key="your-api-key", base_url="https://www.cheapertoken.work")
with open("long_document.txt") as f:
document = f.read()
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=1024,
system=[
{"type": "text", "text": "你是一个文档分析助手。"},
{
"type": "text",
"text": document,
"cache_control": {"type": "ephemeral"}
}
],
messages=[{"role": "user", "content": "请总结这篇文档的主要内容"}]
)
print(response.content[0].text)
print(f"缓存创建 token: {response.usage.cache_creation_input_tokens}")
print(f"缓存命中 token: {response.usage.cache_read_input_tokens}")
import Anthropic from "@anthropic-ai/sdk";
import fs from "fs";
const client = new Anthropic({ apiKey: "your-api-key", baseURL: "https://www.cheapertoken.work" });
const document = fs.readFileSync("long_document.txt", "utf-8");
const response = await client.messages.create({
model: "claude-opus-4-7",
max_tokens: 1024,
system: [
{ type: "text", text: "你是一个文档分析助手。" },
{ type: "text", text: document, cache_control: { type: "ephemeral" } },
],
messages: [{ role: "user", content: "请总结这篇文档的主要内容" }],
});
console.log(response.content[0].text);
console.log(`缓存创建 token: ${response.usage.cache_creation_input_tokens}`);
console.log(`缓存命中 token: ${response.usage.cache_read_input_tokens}`);
curl https://www.cheapertoken.work/v1/messages \
-H "x-api-key: your-api-key" \
-H "anthropic-version: 2023-06-01" \
-H "content-type: application/json" \
-d '{
"model": "claude-opus-4-7",
"max_tokens": 1024,
"system": [
{"type": "text", "text": "你是一个文档分析助手。"},
{"type": "text", "text": "<your long document here>", "cache_control": {"type": "ephemeral"}}
],
"messages": [{"role": "user", "content": "请总结这篇文档的主要内容"}]
}'
多轮对话中缓存工具定义
- Python
- JavaScript
- curl
import anthropic
client = anthropic.Anthropic(api_key="your-api-key", base_url="https://www.cheapertoken.work")
tools = [
{
"name": "search",
"description": "搜索知识库",
"input_schema": {
"type": "object",
"properties": {"query": {"type": "string"}},
"required": ["query"]
},
"cache_control": {"type": "ephemeral"}
}
]
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=1024,
tools=tools,
messages=[{"role": "user", "content": "搜索关于Python的内容"}]
)
print(response.content[0].text)
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ apiKey: "your-api-key", baseURL: "https://www.cheapertoken.work" });
const tools = [
{
name: "search",
description: "搜索知识库",
input_schema: {
type: "object",
properties: { query: { type: "string" } },
required: ["query"],
},
cache_control: { type: "ephemeral" },
},
];
const response = await client.messages.create({
model: "claude-opus-4-7",
max_tokens: 1024,
tools,
messages: [{ role: "user", content: "搜索关于Python的内容" }],
});
console.log(response.content[0].text);
curl https://www.cheapertoken.work/v1/messages \
-H "x-api-key: your-api-key" \
-H "anthropic-version: 2023-06-01" \
-H "content-type: application/json" \
-d '{
"model": "claude-opus-4-7",
"max_tokens": 1024,
"tools": [{
"name": "search",
"description": "搜索知识库",
"input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]},
"cache_control": {"type": "ephemeral"}
}],
"messages": [{"role": "user", "content": "搜索关于Python的内容"}]
}'
缓存最短有效期为 5 分钟,最长 1 小时。最少需缓存 1024 个 token(Haiku 为 2048)。