ship grants, a2a_client, discovery, sandbox SDK + tests

2026-05-09 12:43:07 -03:00
parent b6f6cd1643
commit 2dcb8a09cd
15 changed files with 1853 additions and 75 deletions
--- a/a2a_pack/sandbox.py
+++ b/a2a_pack/sandbox.py
@@ -0,0 +1,174 @@
+"""Code-execution sandbox surface available to agents via ``ctx.sandbox``.
+
+The abstract :class:`SandboxClient` is what agent code programs against. The
+runtime layer (host-side microsandbox + FUSE-mounted MinIO, in-cluster
+DaemonSet, hosted SaaS) supplies a concrete implementation.
+
+The sandbox is **general-purpose code execution**, not Python-only. Agents
+can:
+
+  * run arbitrary shell pipelines: ``await ctx.sandbox.run_shell("git clone … && cargo build")``
+  * exec a binary with explicit args (no shell parsing): ``await sb.exec("/usr/bin/git", ["clone", url])``
+  * pick any OCI image: ``run_shell("npx @openai/codex …", image="node:20-slim")``
+
+``run_python`` is just a convenience for the common Python-snippet case.
+
+Why an abstract here when ``microsandbox`` itself already has a Python SDK?
+The platform owns the *policy* layer — bucket selection, network egress,
+write-path restrictions, resource caps, audit logging. Agents must depend on
+the policy-respecting surface, not on the raw SDK, so the same agent code
+runs unchanged across local dev / cluster / hosted environments.
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Sequence
+
+
+@dataclass(frozen=True)
+class ExecResult:
+    """Result of a command run inside a sandbox."""
+
+    stdout: str
+    stderr: str = ""
+    exit_code: int = 0
+    truncated: bool = False
+
+    @property
+    def output(self) -> str:
+        """Convenience: combined stdout+stderr."""
+        return self.stdout + (self.stderr or "")
+
+    @property
+    def ok(self) -> bool:
+        return self.exit_code == 0
+
+
+@dataclass(frozen=True)
+class SandboxSpec:
+    """Caller request shape for :meth:`SandboxClient.create`."""
+
+    name: str
+    image: str = "python:3.11-slim"
+    memory_mib: int = 512
+    cpus: int = 1
+    # If set, the runtime mounts this workspace at ``/workspace`` inside the
+    # VM (FUSE-backed where supported, snapshot bridge otherwise).
+    workspace: str | None = None
+    # Logical names the runtime should resolve to actual secrets and inject
+    # into the VM env. Values never appear in the CLI/API surface.
+    secrets: tuple[str, ...] = ()
+    # Egress allowlist by hostname; empty = deny all.
+    egress: tuple[str, ...] = ()
+    # Free-form labels for audit / billing.
+    labels: dict[str, str] = field(default_factory=dict)
+
+
+class SandboxHandle(ABC):
+    """Live handle to a running sandbox VM."""
+
+    name: str
+
+    @abstractmethod
+    async def exec(
+        self,
+        cmd: str,
+        args: Sequence[str] | None = None,
+        *,
+        timeout: float | None = None,
+    ) -> ExecResult: ...
+
+    @abstractmethod
+    async def shell(
+        self, script: str, *, timeout: float | None = None
+    ) -> ExecResult: ...
+
+    @abstractmethod
+    async def stop(self) -> None: ...
+
+    @abstractmethod
+    async def kill(self) -> None: ...
+
+    @abstractmethod
+    async def logs(self, *, tail: int | None = None) -> str: ...
+
+
+class SandboxClient(ABC):
+    """Negotiation surface handed to agents via ``ctx.sandbox``."""
+
+    @abstractmethod
+    async def create(self, spec: SandboxSpec) -> SandboxHandle: ...
+
+    @abstractmethod
+    async def get(self, name: str) -> SandboxHandle: ...
+
+    @abstractmethod
+    async def list(self) -> list[str]: ...
+
+    @abstractmethod
+    async def remove(self, name: str) -> None: ...
+
+    async def run_python(
+        self, code: str, *, image: str = "python:3.11-slim", **kwargs: Any
+    ) -> ExecResult:
+        """Convenience: spin a one-shot sandbox, run inline Python, tear down.
+
+        Equivalent to ``create(SandboxSpec(image=image)).exec("python", ["-c", code])``.
+        Use the lower-level surface when you need persistence, multiple
+        commands, or non-Python tools.
+        """
+        import uuid
+
+        spec = SandboxSpec(
+            name=f"py-{uuid.uuid4().hex[:8]}", image=image, **kwargs
+        )
+        sb = await self.create(spec)
+        try:
+            return await sb.exec("python", ["-c", code])
+        finally:
+            try:
+                await sb.stop()
+            except Exception:  # noqa: BLE001
+                pass
+            try:
+                await self.remove(spec.name)
+            except Exception:  # noqa: BLE001
+                pass
+
+    async def run_shell(
+        self,
+        script: str,
+        *,
+        image: str = "python:3.11-slim",
+        **kwargs: Any,
+    ) -> ExecResult:
+        """Convenience: spin a one-shot sandbox, run an arbitrary shell script,
+        tear down.
+
+        Pass ``image=`` to pick the toolchain (e.g. ``"node:20-slim"`` for
+        npm-based tools like codex, ``"rust:1-slim"`` for cargo,
+        ``"alpine/git"`` for plain git ops). The default ``python:3.11-slim``
+        already has bash/coreutils/curl/git so most one-liners just work.
+        """
+        import uuid
+
+        spec = SandboxSpec(
+            name=f"sh-{uuid.uuid4().hex[:8]}", image=image, **kwargs
+        )
+        sb = await self.create(spec)
+        try:
+            return await sb.shell(script)
+        finally:
+            try:
+                await sb.stop()
+            except Exception:  # noqa: BLE001
+                pass
+            try:
+                await self.remove(spec.name)
+            except Exception:  # noqa: BLE001
+                pass
+
+
+class SandboxUnavailable(RuntimeError):
+    """Raised when ``ctx.sandbox`` is accessed but no runtime is attached."""