在 Agent SDK 中处理多种用户输入方式。
文本输入
最基本的输入方式——用户发送文本消息:
messages = [{"role": "user", "content": "你好,请解释这段代码"}]
多模态输入
Claude 支持多种输入类型:
图片输入
import base64
with open("screenshot.png", "rb") as f:
image_data = base64.b64encode(f.read()).decode()
messages = [{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_data
}
},
{
"type": "text",
"text": "这个截图中的 UI 有什么问题?"
}
]
}]
文档输入
with open("document.pdf", "rb") as f:
doc_data = base64.b64encode(f.read()).decode()
messages = [{
"role": "user",
"content": [
{
"type": "document",
"source": {
"type": "base64",
"media_type": "application/pdf",
"data": doc_data
}
},
{"type": "text", "text": "总结这个文档"}
]
}]
交互式输入
def interactive_agent():
"""交互式代理循环。"""
messages = []
print("Claude 代理 (输入 'quit' 退出)")
while True:
user_input = input("> ")
if user_input.lower() == "quit":
break
messages.append({"role": "user", "content": user_input})
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=2048,
tools=tools,
messages=messages
)
# 处理工具调用...
assistant_text = ""
for content in response.content:
if content.type == "text":
assistant_text += content.text
print(assistant_text)
messages.append({"role": "assistant", "content": response.content})