Files
a2a/a2a_pack/sandbox.py

175 lines
5.4 KiB
Python

"""Code-execution sandbox surface available to agents via ``ctx.sandbox``.
The abstract :class:`SandboxClient` is what agent code programs against. The
runtime layer (host-side microsandbox + FUSE-mounted MinIO, in-cluster
DaemonSet, hosted SaaS) supplies a concrete implementation.
The sandbox is **general-purpose code execution**, not Python-only. Agents
can:
* run arbitrary shell pipelines: ``await ctx.sandbox.run_shell("git clone … && cargo build")``
* exec a binary with explicit args (no shell parsing): ``await sb.exec("/usr/bin/git", ["clone", url])``
* pick any OCI image: ``run_shell("npx @openai/codex …", image="node:20-slim")``
``run_python`` is just a convenience for the common Python-snippet case.
Why an abstract here when ``microsandbox`` itself already has a Python SDK?
The platform owns the *policy* layer — bucket selection, network egress,
write-path restrictions, resource caps, audit logging. Agents must depend on
the policy-respecting surface, not on the raw SDK, so the same agent code
runs unchanged across local dev / cluster / hosted environments.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Sequence
@dataclass(frozen=True)
class ExecResult:
"""Result of a command run inside a sandbox."""
stdout: str
stderr: str = ""
exit_code: int = 0
truncated: bool = False
@property
def output(self) -> str:
"""Convenience: combined stdout+stderr."""
return self.stdout + (self.stderr or "")
@property
def ok(self) -> bool:
return self.exit_code == 0
@dataclass(frozen=True)
class SandboxSpec:
"""Caller request shape for :meth:`SandboxClient.create`."""
name: str
image: str = "python:3.11-slim"
memory_mib: int = 512
cpus: int = 1
# If set, the runtime mounts this workspace at ``/workspace`` inside the
# VM (FUSE-backed where supported, snapshot bridge otherwise).
workspace: str | None = None
# Logical names the runtime should resolve to actual secrets and inject
# into the VM env. Values never appear in the CLI/API surface.
secrets: tuple[str, ...] = ()
# Egress allowlist by hostname; empty = deny all.
egress: tuple[str, ...] = ()
# Free-form labels for audit / billing.
labels: dict[str, str] = field(default_factory=dict)
class SandboxHandle(ABC):
"""Live handle to a running sandbox VM."""
name: str
@abstractmethod
async def exec(
self,
cmd: str,
args: Sequence[str] | None = None,
*,
timeout: float | None = None,
) -> ExecResult: ...
@abstractmethod
async def shell(
self, script: str, *, timeout: float | None = None
) -> ExecResult: ...
@abstractmethod
async def stop(self) -> None: ...
@abstractmethod
async def kill(self) -> None: ...
@abstractmethod
async def logs(self, *, tail: int | None = None) -> str: ...
class SandboxClient(ABC):
"""Negotiation surface handed to agents via ``ctx.sandbox``."""
@abstractmethod
async def create(self, spec: SandboxSpec) -> SandboxHandle: ...
@abstractmethod
async def get(self, name: str) -> SandboxHandle: ...
@abstractmethod
async def list(self) -> list[str]: ...
@abstractmethod
async def remove(self, name: str) -> None: ...
async def run_python(
self, code: str, *, image: str = "python:3.11-slim", **kwargs: Any
) -> ExecResult:
"""Convenience: spin a one-shot sandbox, run inline Python, tear down.
Equivalent to ``create(SandboxSpec(image=image)).exec("python", ["-c", code])``.
Use the lower-level surface when you need persistence, multiple
commands, or non-Python tools.
"""
import uuid
spec = SandboxSpec(
name=f"py-{uuid.uuid4().hex[:8]}", image=image, **kwargs
)
sb = await self.create(spec)
try:
return await sb.exec("python", ["-c", code])
finally:
try:
await sb.stop()
except Exception: # noqa: BLE001
pass
try:
await self.remove(spec.name)
except Exception: # noqa: BLE001
pass
async def run_shell(
self,
script: str,
*,
image: str = "python:3.11-slim",
**kwargs: Any,
) -> ExecResult:
"""Convenience: spin a one-shot sandbox, run an arbitrary shell script,
tear down.
Pass ``image=`` to pick the toolchain (e.g. ``"node:20-slim"`` for
npm-based tools like codex, ``"rust:1-slim"`` for cargo,
``"alpine/git"`` for plain git ops). The default ``python:3.11-slim``
already has bash/coreutils/curl/git so most one-liners just work.
"""
import uuid
spec = SandboxSpec(
name=f"sh-{uuid.uuid4().hex[:8]}", image=image, **kwargs
)
sb = await self.create(spec)
try:
return await sb.shell(script)
finally:
try:
await sb.stop()
except Exception: # noqa: BLE001
pass
try:
await self.remove(spec.name)
except Exception: # noqa: BLE001
pass
class SandboxUnavailable(RuntimeError):
"""Raised when ``ctx.sandbox`` is accessed but no runtime is attached."""