a2a/a2a_pack/sandbox.py

"""Code-execution sandbox surface available to agents via ``ctx.sandbox``.

The abstract :class:`SandboxClient` is what agent code programs against. The
runtime layer (host-side microsandbox + FUSE-mounted MinIO, in-cluster
DaemonSet, hosted SaaS) supplies a concrete implementation.

The sandbox is **general-purpose code execution**, not Python-only. Agents
can:

  * run arbitrary shell pipelines: ``await ctx.sandbox.run_shell("git clone … && cargo build")``
  * exec a binary with explicit args (no shell parsing): ``await sb.exec("/usr/bin/git", ["clone", url])``
  * pick any OCI image: ``run_shell("npx @openai/codex …", image="node:20-slim")``

``run_python`` is just a convenience for the common Python-snippet case.

Why an abstract here when ``microsandbox`` itself already has a Python SDK?
The platform owns the *policy* layer — bucket selection, network egress,
write-path restrictions, resource caps, audit logging. Agents must depend on
the policy-respecting surface, not on the raw SDK, so the same agent code
runs unchanged across local dev / cluster / hosted environments.
"""
from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Sequence


@dataclass(frozen=True)
class ExecResult:
    """Result of a command run inside a sandbox."""

    stdout: str
    stderr: str = ""
    exit_code: int = 0
    truncated: bool = False

    @property
    def output(self) -> str:
        """Convenience: combined stdout+stderr."""
        return self.stdout + (self.stderr or "")

    @property
    def ok(self) -> bool:
        return self.exit_code == 0


@dataclass(frozen=True)
class SandboxSpec:
    """Caller request shape for :meth:`SandboxClient.create`."""

    name: str
    image: str = "python:3.11-slim"
    memory_mib: int = 512
    cpus: int = 1
    # If set, the runtime mounts this workspace at ``/workspace`` inside the
    # VM (FUSE-backed where supported, snapshot bridge otherwise).
    workspace: str | None = None
    # Logical names the runtime should resolve to actual secrets and inject
    # into the VM env. Values never appear in the CLI/API surface.
    secrets: tuple[str, ...] = ()
    # Egress allowlist by hostname; empty = deny all.
    egress: tuple[str, ...] = ()
    # Free-form labels for audit / billing.
    labels: dict[str, str] = field(default_factory=dict)


class SandboxHandle(ABC):
    """Live handle to a running sandbox VM."""

    name: str

    @abstractmethod
    async def exec(
        self,
        cmd: str,
        args: Sequence[str] | None = None,
        *,
        timeout: float | None = None,
    ) -> ExecResult: ...

    @abstractmethod
    async def shell(
        self, script: str, *, timeout: float | None = None
    ) -> ExecResult: ...

    @abstractmethod
    async def stop(self) -> None: ...

    @abstractmethod
    async def kill(self) -> None: ...

    @abstractmethod
    async def logs(self, *, tail: int | None = None) -> str: ...


class SandboxClient(ABC):
    """Negotiation surface handed to agents via ``ctx.sandbox``."""

    @abstractmethod
    async def create(self, spec: SandboxSpec) -> SandboxHandle: ...

    @abstractmethod
    async def get(self, name: str) -> SandboxHandle: ...

    @abstractmethod
    async def list(self) -> list[str]: ...

    @abstractmethod
    async def remove(self, name: str) -> None: ...

    async def run_python(
        self, code: str, *, image: str = "python:3.11-slim", **kwargs: Any
    ) -> ExecResult:
        """Convenience: spin a one-shot sandbox, run inline Python, tear down.

        Equivalent to ``create(SandboxSpec(image=image)).exec("python", ["-c", code])``.
        Use the lower-level surface when you need persistence, multiple
        commands, or non-Python tools.
        """
        import uuid

        spec = SandboxSpec(
            name=f"py-{uuid.uuid4().hex[:8]}", image=image, **kwargs
        )
        sb = await self.create(spec)
        try:
            return await sb.exec("python", ["-c", code])
        finally:
            try:
                await sb.stop()
            except Exception:  # noqa: BLE001
                pass
            try:
                await self.remove(spec.name)
            except Exception:  # noqa: BLE001
                pass

    async def run_shell(
        self,
        script: str,
        *,
        image: str = "python:3.11-slim",
        **kwargs: Any,
    ) -> ExecResult:
        """Convenience: spin a one-shot sandbox, run an arbitrary shell script,
        tear down.

        Pass ``image=`` to pick the toolchain (e.g. ``"node:20-slim"`` for
        npm-based tools like codex, ``"rust:1-slim"`` for cargo,
        ``"alpine/git"`` for plain git ops). The default ``python:3.11-slim``
        already has bash/coreutils/curl/git so most one-liners just work.
        """
        import uuid

        spec = SandboxSpec(
            name=f"sh-{uuid.uuid4().hex[:8]}", image=image, **kwargs
        )
        sb = await self.create(spec)
        try:
            return await sb.shell(script)
        finally:
            try:
                await sb.stop()
            except Exception:  # noqa: BLE001
                pass
            try:
                await self.remove(spec.name)
            except Exception:  # noqa: BLE001
                pass


class SandboxUnavailable(RuntimeError):
    """Raised when ``ctx.sandbox`` is accessed but no runtime is attached."""