Quickstart¶
Installation¶
Configuration¶
Get your API key from lunette.dev, then set it:
Option 1: Inspect AI¶
If you have an Inspect AI task, just add --sandbox lunette:
That's it. Your trajectories are captured with full environment access.
Option 2: SDK¶
Step 1: Trace LLM Calls¶
The simplest integration—wrap your LLM calls to capture trajectories:
import asyncio
from anthropic import AsyncAnthropic
from lunette import LunetteTracer
async def main():
client = AsyncAnthropic()
tracer = LunetteTracer(task="my-eval", model="claude-haiku-4-5")
async with tracer.trajectory(sample="question-1"):
response = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=256,
messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
print(response.content[0].text)
result = await tracer.close()
print(f"Uploaded: {result['run_id']}")
asyncio.run(main())
All LLM calls inside trajectory() are captured automatically via OpenTelemetry—also works for OpenAI, see Tracing.
Step 2: Add a Sandbox¶
If your agent executes code, add a sandbox for deeper investigation capabilities:
import asyncio
from anthropic import AsyncAnthropic
from lunette import LunetteClient, LunetteTracer
TOOLS = [{
"name": "bash",
"description": "Execute a bash command",
"input_schema": {
"type": "object",
"properties": {"command": {"type": "string"}},
"required": ["command"]
}
}]
async def run_agent(sandbox, task: str) -> str:
client = AsyncAnthropic()
messages = [{"role": "user", "content": task}]
while True:
response = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=1024,
system="You are a coding assistant. Run Python with: python3 -c 'code'",
tools=TOOLS,
messages=messages,
)
if response.stop_reason == "end_turn":
return next((b.text for b in response.content if b.type == "text"), "")
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = await sandbox.aexec(block.input["command"])
output = result.stdout if result.success else f"Error: {result.stderr}"
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output
})
messages.append({"role": "user", "content": tool_results})
async def main():
tracer = LunetteTracer(task="math-eval", model="claude-haiku-4-5")
async with LunetteClient() as client:
sandbox = await client.create_sandbox({"image": "python:3.11-slim"})
async with tracer.trajectory(sample="problem-1"):
answer = await run_agent(sandbox, "What is 2^100? Compute it with Python.")
print(f"Answer: {answer}")
await sandbox.destroy()
result = await tracer.close()
print(f"Uploaded: {result['run_id']}")
asyncio.run(main())
With a sandbox, investigators can re-run commands, inspect files, and reproduce errors in the original environment.
You can now view your trajectories at lunette.dev, and then start an investigation to understand your agent behavior.