175 lines
5.4 KiB
Python
175 lines
5.4 KiB
Python
"""Code-execution sandbox surface available to agents via ``ctx.sandbox``.
|
|
|
|
The abstract :class:`SandboxClient` is what agent code programs against. The
|
|
runtime layer (host-side microsandbox + FUSE-mounted MinIO, in-cluster
|
|
DaemonSet, hosted SaaS) supplies a concrete implementation.
|
|
|
|
The sandbox is **general-purpose code execution**, not Python-only. Agents
|
|
can:
|
|
|
|
* run arbitrary shell pipelines: ``await ctx.sandbox.run_shell("git clone … && cargo build")``
|
|
* exec a binary with explicit args (no shell parsing): ``await sb.exec("/usr/bin/git", ["clone", url])``
|
|
* pick any OCI image: ``run_shell("npx @openai/codex …", image="node:20-slim")``
|
|
|
|
``run_python`` is just a convenience for the common Python-snippet case.
|
|
|
|
Why an abstract here when ``microsandbox`` itself already has a Python SDK?
|
|
The platform owns the *policy* layer — bucket selection, network egress,
|
|
write-path restrictions, resource caps, audit logging. Agents must depend on
|
|
the policy-respecting surface, not on the raw SDK, so the same agent code
|
|
runs unchanged across local dev / cluster / hosted environments.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Sequence
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ExecResult:
|
|
"""Result of a command run inside a sandbox."""
|
|
|
|
stdout: str
|
|
stderr: str = ""
|
|
exit_code: int = 0
|
|
truncated: bool = False
|
|
|
|
@property
|
|
def output(self) -> str:
|
|
"""Convenience: combined stdout+stderr."""
|
|
return self.stdout + (self.stderr or "")
|
|
|
|
@property
|
|
def ok(self) -> bool:
|
|
return self.exit_code == 0
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SandboxSpec:
|
|
"""Caller request shape for :meth:`SandboxClient.create`."""
|
|
|
|
name: str
|
|
image: str = "python:3.11-slim"
|
|
memory_mib: int = 512
|
|
cpus: int = 1
|
|
# If set, the runtime mounts this workspace at ``/workspace`` inside the
|
|
# VM (FUSE-backed where supported, snapshot bridge otherwise).
|
|
workspace: str | None = None
|
|
# Logical names the runtime should resolve to actual secrets and inject
|
|
# into the VM env. Values never appear in the CLI/API surface.
|
|
secrets: tuple[str, ...] = ()
|
|
# Egress allowlist by hostname; empty = deny all.
|
|
egress: tuple[str, ...] = ()
|
|
# Free-form labels for audit / billing.
|
|
labels: dict[str, str] = field(default_factory=dict)
|
|
|
|
|
|
class SandboxHandle(ABC):
|
|
"""Live handle to a running sandbox VM."""
|
|
|
|
name: str
|
|
|
|
@abstractmethod
|
|
async def exec(
|
|
self,
|
|
cmd: str,
|
|
args: Sequence[str] | None = None,
|
|
*,
|
|
timeout: float | None = None,
|
|
) -> ExecResult: ...
|
|
|
|
@abstractmethod
|
|
async def shell(
|
|
self, script: str, *, timeout: float | None = None
|
|
) -> ExecResult: ...
|
|
|
|
@abstractmethod
|
|
async def stop(self) -> None: ...
|
|
|
|
@abstractmethod
|
|
async def kill(self) -> None: ...
|
|
|
|
@abstractmethod
|
|
async def logs(self, *, tail: int | None = None) -> str: ...
|
|
|
|
|
|
class SandboxClient(ABC):
|
|
"""Negotiation surface handed to agents via ``ctx.sandbox``."""
|
|
|
|
@abstractmethod
|
|
async def create(self, spec: SandboxSpec) -> SandboxHandle: ...
|
|
|
|
@abstractmethod
|
|
async def get(self, name: str) -> SandboxHandle: ...
|
|
|
|
@abstractmethod
|
|
async def list(self) -> list[str]: ...
|
|
|
|
@abstractmethod
|
|
async def remove(self, name: str) -> None: ...
|
|
|
|
async def run_python(
|
|
self, code: str, *, image: str = "python:3.11-slim", **kwargs: Any
|
|
) -> ExecResult:
|
|
"""Convenience: spin a one-shot sandbox, run inline Python, tear down.
|
|
|
|
Equivalent to ``create(SandboxSpec(image=image)).exec("python", ["-c", code])``.
|
|
Use the lower-level surface when you need persistence, multiple
|
|
commands, or non-Python tools.
|
|
"""
|
|
import uuid
|
|
|
|
spec = SandboxSpec(
|
|
name=f"py-{uuid.uuid4().hex[:8]}", image=image, **kwargs
|
|
)
|
|
sb = await self.create(spec)
|
|
try:
|
|
return await sb.exec("python", ["-c", code])
|
|
finally:
|
|
try:
|
|
await sb.stop()
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
try:
|
|
await self.remove(spec.name)
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
|
|
async def run_shell(
|
|
self,
|
|
script: str,
|
|
*,
|
|
image: str = "python:3.11-slim",
|
|
**kwargs: Any,
|
|
) -> ExecResult:
|
|
"""Convenience: spin a one-shot sandbox, run an arbitrary shell script,
|
|
tear down.
|
|
|
|
Pass ``image=`` to pick the toolchain (e.g. ``"node:20-slim"`` for
|
|
npm-based tools like codex, ``"rust:1-slim"`` for cargo,
|
|
``"alpine/git"`` for plain git ops). The default ``python:3.11-slim``
|
|
already has bash/coreutils/curl/git so most one-liners just work.
|
|
"""
|
|
import uuid
|
|
|
|
spec = SandboxSpec(
|
|
name=f"sh-{uuid.uuid4().hex[:8]}", image=image, **kwargs
|
|
)
|
|
sb = await self.create(spec)
|
|
try:
|
|
return await sb.shell(script)
|
|
finally:
|
|
try:
|
|
await sb.stop()
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
try:
|
|
await self.remove(spec.name)
|
|
except Exception: # noqa: BLE001
|
|
pass
|
|
|
|
|
|
class SandboxUnavailable(RuntimeError):
|
|
"""Raised when ``ctx.sandbox`` is accessed but no runtime is attached."""
|