From af9386f879a5fd42516c5a0e43f9793d5c707bee Mon Sep 17 00:00:00 2001 From: Devin Foley Date: Tue, 5 May 2026 08:21:37 -0700 Subject: [PATCH] Run a real command-v probe and source login profiles before exec in e2b sandboxes (#5279) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > **Stacked PR.** Sits on top of #5278 (`e2b/stage-stdin-to-temp-file`) which ships the stdin-staging fix this builds on. The cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are `packages/adapter-utils/src/execution-target.ts`, `packages/plugins/sandbox-providers/e2b/src/plugin.ts`, and `packages/plugins/sandbox-providers/e2b/src/plugin.test.ts`. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - The adapter Test flow does an "is the command resolvable?" probe before running the hello probe so the report distinguishes "binary not installed" from "binary errored" > - For sandbox targets, that resolvability check was a no-op early-return — every sandboxed adapter test reported "Command is executable" regardless of whether the binary existed > - That made the resolvability check disagree with the hello probe in a way that looked like a PATH bug, when it was actually a missing CLI > - Separately, the e2b spawn used `sandbox.commands.run` with a non-login non-interactive shell whose PATH did not include npm-globals, nvm shims, or anything else the template installs via `.profile`/`.bashrc` > - This pull request makes the resolvability check honest by running a real `command -v` invocation through the sandbox runner, and aligns the e2b spawn with SSH by sourcing login profiles before `exec env KEY=val ` > - The benefit is the e2b sandbox spawn agrees with the hello probe and finds CLIs at template-installed paths ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `ensureSandboxCommandResolvable` that runs `command -v ` through the sandbox runner; replace the early-return in `ensureAdapterExecutionTargetCommandResolvable` for sandbox targets - `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: replace `buildCommandLine` with `buildLoginShellScript` (sources `/etc/profile`, `~/.profile`, `~/.bash_profile`, `~/.bashrc`, `~/.zprofile`, and nvm.sh before `exec env KEY=val `); env vars are interpolated inline so user-configured adapter env always wins over profile-exported values; drop the now-unused `envs:` SDK option - `plugin.test.ts` updated for the login-shell wrapping ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/sandbox-e2b` — 17/17 plugin tests pass - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` clean - `pnpm typecheck` clean - Manual: previously every sandboxed adapter said "Command is executable" then the hello probe failed with "exec: not found". After this change, missing CLIs surface honestly at the resolvability step. SSH no-regression: SSH Claude probe still passes. ## Risks Medium — sandbox adapter Test reports will start failing at the resolvability step for environments where the CLI was never actually installed. This was always the real state; the previous "Command is executable" message was incorrect. Operators should expect previously-green-but-broken sandbox environments to report accurately. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — `plugin.test.ts` updated for the login-shell wrapping - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- .../adapter-utils/src/execution-target.ts | 31 +++++++++++ .../sandbox-providers/e2b/src/plugin.test.ts | 36 ++++++------- .../sandbox-providers/e2b/src/plugin.ts | 54 +++++++++++++++++-- 3 files changed, 97 insertions(+), 24 deletions(-) diff --git a/packages/adapter-utils/src/execution-target.ts b/packages/adapter-utils/src/execution-target.ts index 398f4899..06445a2c 100644 --- a/packages/adapter-utils/src/execution-target.ts +++ b/packages/adapter-utils/src/execution-target.ts @@ -232,6 +232,7 @@ export async function ensureAdapterExecutionTargetCommandResolvable( env: NodeJS.ProcessEnv, ) { if (target?.kind === "remote" && target.transport === "sandbox") { + await ensureSandboxCommandResolvable(command, target); return; } await ensureCommandResolvable(command, cwd, env, { @@ -239,6 +240,36 @@ export async function ensureAdapterExecutionTargetCommandResolvable( }); } +async function ensureSandboxCommandResolvable( + command: string, + target: AdapterSandboxExecutionTarget, +): Promise { + // Probe whether the binary is resolvable inside the sandbox. We previously + // short-circuited this for sandbox targets, which let the caller report a + // success message even when the CLI was missing from the image. Now we run + // a real `command -v` through the same runner the hello probe will use, so + // the first step honestly reflects whether the binary is on PATH. The + // sandbox provider is responsible for sourcing login profiles (e2b mirrors + // SSH's buildSshSpawnTarget) so this and the hello probe agree on PATH. + const runner = requireSandboxRunner(target); + const probeScript = `command -v ${shellQuote(command)}`; + const result = await runner.execute({ + command: "sh", + args: ["-c", probeScript], + cwd: target.remoteCwd, + timeoutMs: target.timeoutMs ?? 15_000, + }); + if (result.timedOut) { + throw new Error(`Timed out checking command "${command}" on sandbox target.`); + } + if ((result.exitCode ?? 1) === 0) return; + const stderr = result.stderr.trim(); + const detail = stderr.length > 0 ? ` (${stderr})` : ""; + throw new Error( + `Command "${command}" is not installed or not on PATH in the sandbox environment${detail}.`, + ); +} + export async function resolveAdapterExecutionTargetCommandForLogs( command: string, target: AdapterExecutionTarget | null | undefined, diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts index 71e36b91..99b881c8 100644 --- a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts +++ b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts @@ -303,17 +303,14 @@ describe("E2B sandbox provider plugin", () => { expect(mockConnect).toHaveBeenCalledWith("sandbox-123", expect.objectContaining({ apiKey: "resolved-key" })); expect(sandbox.files.write).toHaveBeenCalledWith(expect.stringMatching(/^\/tmp\/paperclip-stdin-/), "input"); - expect(sandbox.commands.run).toHaveBeenCalledWith(expect.stringMatching( - /^exec 'printf' 'hello' < '\/tmp\/paperclip-stdin-/, - ), expect.objectContaining({ - cwd: "/workspace", - envs: { FOO: "bar" }, - timeoutMs: 1000, - })); - expect(sandbox.commands.run).not.toHaveBeenCalledWith( - "exec 'printf' 'hello'", - expect.objectContaining({ background: true }), - ); + const stdinCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'")); + expect(stdinCall).toBeDefined(); + if (!stdinCall) throw new Error("stdinCall not found"); + expect(stdinCall[0]).toMatch(/\.profile/); + expect(stdinCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello' < '\/tmp\/paperclip-stdin-/); + expect(stdinCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 })); + expect(stdinCall[1]).not.toHaveProperty("envs"); + expect(stdinCall[1]).not.toHaveProperty("background"); expect(sandbox.commands.sendStdin).not.toHaveBeenCalled(); expect(sandbox.commands.closeStdin).not.toHaveBeenCalled(); expect(sandbox.handle.wait).not.toHaveBeenCalled(); @@ -363,15 +360,14 @@ describe("E2B sandbox provider plugin", () => { timeoutMs: 1000, }); - expect(sandbox.commands.run).toHaveBeenCalledWith("exec 'printf' 'hello'", expect.objectContaining({ - cwd: "/workspace", - envs: { FOO: "bar" }, - timeoutMs: 1000, - })); - expect(sandbox.commands.run).not.toHaveBeenCalledWith( - "exec 'printf' 'hello'", - expect.objectContaining({ background: true }), - ); + const fgCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'")); + expect(fgCall).toBeDefined(); + if (!fgCall) throw new Error("fgCall not found"); + expect(fgCall[0]).toMatch(/\.profile/); + expect(fgCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello'$/); + expect(fgCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 })); + expect(fgCall[1]).not.toHaveProperty("envs"); + expect(fgCall[1]).not.toHaveProperty("background"); expect(sandbox.commands.sendStdin).not.toHaveBeenCalled(); expect(sandbox.commands.closeStdin).not.toHaveBeenCalled(); expect(sandbox.handle.wait).not.toHaveBeenCalled(); diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.ts index 142bcad9..daf15486 100644 --- a/packages/plugins/sandbox-providers/e2b/src/plugin.ts +++ b/packages/plugins/sandbox-providers/e2b/src/plugin.ts @@ -148,8 +148,48 @@ function shellQuote(value: string) { return `'${value.replace(/'/g, `'"'"'`)}'`; } -function buildCommandLine(command: string, args: string[] = []) { - return `exec ${[command, ...args].map(shellQuote).join(" ")}`; +function isValidShellEnvKey(value: string) { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value); +} + +// Mirror SSH's buildSshSpawnTarget: source the user's login profiles (and nvm) +// before exec so commands run with the same PATH the user sees in an +// interactive shell. e2b's `sandbox.commands.run` otherwise spawns a +// non-login, non-interactive shell whose PATH does not include npm-globals, +// nvm shims, or anything else the template installs via .profile/.bashrc — +// which makes the hello probe fail with `exec: : not found` even when +// the binary is on disk. +function buildLoginShellScript(input: { + command: string; + args: string[]; + env?: Record; +}): string { + const env = input.env ?? {}; + for (const key of Object.keys(env)) { + if (!isValidShellEnvKey(key)) { + throw new Error(`Invalid sandbox environment variable key: ${key}`); + } + } + const envArgs = Object.entries(env) + .filter((entry): entry is [string, string] => typeof entry[1] === "string") + .map(([key, value]) => `${key}=${shellQuote(value)}`); + const commandParts = [shellQuote(input.command), ...input.args.map(shellQuote)].join(" "); + const execLine = envArgs.length > 0 + ? `exec env ${envArgs.join(" ")} ${commandParts}` + : `exec ${commandParts}`; + return [ + 'if [ -f /etc/profile ]; then . /etc/profile >/dev/null 2>&1 || true; fi', + 'if [ -f "$HOME/.profile" ]; then . "$HOME/.profile" >/dev/null 2>&1 || true; fi', + // .bash_profile typically sources .bashrc itself; only source .bashrc + // directly when no .bash_profile exists to avoid re-running idempotency- + // sensitive setup (nvm, PATH prepends) twice on templates that wire + // .bash_profile -> .bashrc. + 'if [ -f "$HOME/.bash_profile" ]; then . "$HOME/.bash_profile" >/dev/null 2>&1 || true; elif [ -f "$HOME/.bashrc" ]; then . "$HOME/.bashrc" >/dev/null 2>&1 || true; fi', + 'if [ -f "$HOME/.zprofile" ]; then . "$HOME/.zprofile" >/dev/null 2>&1 || true; fi', + 'export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"', + '[ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh" >/dev/null 2>&1 || true', + execLine, + ].join(" && "); } async function killSandboxBestEffort(sandbox: Sandbox, reason: string): Promise { @@ -351,7 +391,11 @@ const plugin = definePlugin({ const config = parseDriverConfig(params.config); const sandbox = await connectSandbox(config, params.lease.providerLeaseId); - const baseCommand = buildCommandLine(params.command, params.args); + const baseCommand = buildLoginShellScript({ + command: params.command, + args: params.args ?? [], + env: params.env, + }); const timeoutMs = params.timeoutMs ?? config.timeoutMs; // For commands with stdin, stage the payload to a temp file inside the @@ -379,9 +423,11 @@ const plugin = definePlugin({ : baseCommand; try { + // Env is interpolated into the script via `exec env KEY=val …` after + // profile sourcing so user-configured env wins over anything profiles + // export. No need to pass `envs:` separately. const result = await sandbox.commands.run(command, { cwd: params.cwd, - envs: params.env, timeoutMs, }) as Awaited> & { exitCode: number;