From af9386f879a5fd42516c5a0e43f9793d5c707bee Mon Sep 17 00:00:00 2001
From: Devin Foley <devin@devinfoley.com>
Date: Tue, 5 May 2026 08:21:37 -0700
Subject: [PATCH] Run a real command-v probe and source login profiles before
 exec in e2b sandboxes (#5279)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

> **Stacked PR.** Sits on top of #5278 (`e2b/stage-stdin-to-temp-file`)
which ships the stdin-staging fix this builds on. The cumulative diff
against `master` includes that PR's content; the files touched by *this*
PR's commit are `packages/adapter-utils/src/execution-target.ts`,
`packages/plugins/sandbox-providers/e2b/src/plugin.ts`, and
`packages/plugins/sandbox-providers/e2b/src/plugin.test.ts`.

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - The adapter Test flow does an "is the command resolvable?" probe
before running the hello probe so the report distinguishes "binary not
installed" from "binary errored"
> - For sandbox targets, that resolvability check was a no-op
early-return — every sandboxed adapter test reported "Command is
executable" regardless of whether the binary existed
> - That made the resolvability check disagree with the hello probe in a
way that looked like a PATH bug, when it was actually a missing CLI
> - Separately, the e2b spawn used `sandbox.commands.run` with a
non-login non-interactive shell whose PATH did not include npm-globals,
nvm shims, or anything else the template installs via
`.profile`/`.bashrc`
> - This pull request makes the resolvability check honest by running a
real `command -v` invocation through the sandbox runner, and aligns the
e2b spawn with SSH by sourcing login profiles before `exec env KEY=val
<cmd>`
> - The benefit is the e2b sandbox spawn agrees with the hello probe and
finds CLIs at template-installed paths

## What Changed

- `packages/adapter-utils/src/execution-target.ts`: add
`ensureSandboxCommandResolvable` that runs `command -v <cli>` through
the sandbox runner; replace the early-return in
`ensureAdapterExecutionTargetCommandResolvable` for sandbox targets
- `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: replace
`buildCommandLine` with `buildLoginShellScript` (sources `/etc/profile`,
`~/.profile`, `~/.bash_profile`, `~/.bashrc`, `~/.zprofile`, and nvm.sh
before `exec env KEY=val <cmd>`); env vars are interpolated inline so
user-configured adapter env always wins over profile-exported values;
drop the now-unused `envs:` SDK option
- `plugin.test.ts` updated for the login-shell wrapping

## Verification

- `pnpm vitest run --no-coverage --project @paperclipai/sandbox-e2b` —
17/17 plugin tests pass
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils`
clean
- `pnpm typecheck` clean
- Manual: previously every sandboxed adapter said "Command is
executable" then the hello probe failed with "exec: not found". After
this change, missing CLIs surface honestly at the resolvability step.
SSH no-regression: SSH Claude probe still passes.

## Risks

Medium — sandbox adapter Test reports will start failing at the
resolvability step for environments where the CLI was never actually
installed. This was always the real state; the previous "Command is
executable" message was incorrect. Operators should expect
previously-green-but-broken sandbox environments to report accurately.

## Model Used

Claude Opus 4.7 (1M context)

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — `plugin.test.ts`
updated for the login-shell wrapping
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
---
 .../adapter-utils/src/execution-target.ts     | 31 +++++++++++
 .../sandbox-providers/e2b/src/plugin.test.ts  | 36 ++++++-------
 .../sandbox-providers/e2b/src/plugin.ts       | 54 +++++++++++++++++--
 3 files changed, 97 insertions(+), 24 deletions(-)
diff --git a/packages/adapter-utils/src/execution-target.ts b/packages/adapter-utils/src/execution-target.ts
index 398f4899..06445a2c 100644
--- a/packages/adapter-utils/src/execution-target.ts
+++ b/packages/adapter-utils/src/execution-target.ts
@@ -232,6 +232,7 @@ export async function ensureAdapterExecutionTargetCommandResolvable(
   env: NodeJS.ProcessEnv,
 ) {
   if (target?.kind === "remote" && target.transport === "sandbox") {
+    await ensureSandboxCommandResolvable(command, target);
     return;
   }
   await ensureCommandResolvable(command, cwd, env, {
@@ -239,6 +240,36 @@ export async function ensureAdapterExecutionTargetCommandResolvable(
   });
 }
 
+async function ensureSandboxCommandResolvable(
+  command: string,
+  target: AdapterSandboxExecutionTarget,
+): Promise<void> {
+  // Probe whether the binary is resolvable inside the sandbox. We previously
+  // short-circuited this for sandbox targets, which let the caller report a
+  // success message even when the CLI was missing from the image. Now we run
+  // a real `command -v` through the same runner the hello probe will use, so
+  // the first step honestly reflects whether the binary is on PATH. The
+  // sandbox provider is responsible for sourcing login profiles (e2b mirrors
+  // SSH's buildSshSpawnTarget) so this and the hello probe agree on PATH.
+  const runner = requireSandboxRunner(target);
+  const probeScript = `command -v ${shellQuote(command)}`;
+  const result = await runner.execute({
+    command: "sh",
+    args: ["-c", probeScript],
+    cwd: target.remoteCwd,
+    timeoutMs: target.timeoutMs ?? 15_000,
+  });
+  if (result.timedOut) {
+    throw new Error(`Timed out checking command "${command}" on sandbox target.`);
+  }
+  if ((result.exitCode ?? 1) === 0) return;
+  const stderr = result.stderr.trim();
+  const detail = stderr.length > 0 ? ` (${stderr})` : "";
+  throw new Error(
+    `Command "${command}" is not installed or not on PATH in the sandbox environment${detail}.`,
+  );
+}
+
 export async function resolveAdapterExecutionTargetCommandForLogs(
   command: string,
   target: AdapterExecutionTarget | null | undefined,
diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts
index 71e36b91..99b881c8 100644
--- a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts
+++ b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts
@@ -303,17 +303,14 @@ describe("E2B sandbox provider plugin", () => {
 
     expect(mockConnect).toHaveBeenCalledWith("sandbox-123", expect.objectContaining({ apiKey: "resolved-key" }));
     expect(sandbox.files.write).toHaveBeenCalledWith(expect.stringMatching(/^\/tmp\/paperclip-stdin-/), "input");
-    expect(sandbox.commands.run).toHaveBeenCalledWith(expect.stringMatching(
-      /^exec 'printf' 'hello' < '\/tmp\/paperclip-stdin-/,
-    ), expect.objectContaining({
-      cwd: "/workspace",
-      envs: { FOO: "bar" },
-      timeoutMs: 1000,
-    }));
-    expect(sandbox.commands.run).not.toHaveBeenCalledWith(
-      "exec 'printf' 'hello'",
-      expect.objectContaining({ background: true }),
-    );
+    const stdinCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'"));
+    expect(stdinCall).toBeDefined();
+    if (!stdinCall) throw new Error("stdinCall not found");
+    expect(stdinCall[0]).toMatch(/\.profile/);
+    expect(stdinCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello' < '\/tmp\/paperclip-stdin-/);
+    expect(stdinCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 }));
+    expect(stdinCall[1]).not.toHaveProperty("envs");
+    expect(stdinCall[1]).not.toHaveProperty("background");
     expect(sandbox.commands.sendStdin).not.toHaveBeenCalled();
     expect(sandbox.commands.closeStdin).not.toHaveBeenCalled();
     expect(sandbox.handle.wait).not.toHaveBeenCalled();
@@ -363,15 +360,14 @@ describe("E2B sandbox provider plugin", () => {
       timeoutMs: 1000,
     });
 
-    expect(sandbox.commands.run).toHaveBeenCalledWith("exec 'printf' 'hello'", expect.objectContaining({
-      cwd: "/workspace",
-      envs: { FOO: "bar" },
-      timeoutMs: 1000,
-    }));
-    expect(sandbox.commands.run).not.toHaveBeenCalledWith(
-      "exec 'printf' 'hello'",
-      expect.objectContaining({ background: true }),
-    );
+    const fgCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'"));
+    expect(fgCall).toBeDefined();
+    if (!fgCall) throw new Error("fgCall not found");
+    expect(fgCall[0]).toMatch(/\.profile/);
+    expect(fgCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello'$/);
+    expect(fgCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 }));
+    expect(fgCall[1]).not.toHaveProperty("envs");
+    expect(fgCall[1]).not.toHaveProperty("background");
     expect(sandbox.commands.sendStdin).not.toHaveBeenCalled();
     expect(sandbox.commands.closeStdin).not.toHaveBeenCalled();
     expect(sandbox.handle.wait).not.toHaveBeenCalled();
diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.ts
index 142bcad9..daf15486 100644
--- a/packages/plugins/sandbox-providers/e2b/src/plugin.ts
+++ b/packages/plugins/sandbox-providers/e2b/src/plugin.ts
@@ -148,8 +148,48 @@ function shellQuote(value: string) {
   return `'${value.replace(/'/g, `'"'"'`)}'`;
 }
 
-function buildCommandLine(command: string, args: string[] = []) {
-  return `exec ${[command, ...args].map(shellQuote).join(" ")}`;
+function isValidShellEnvKey(value: string) {
+  return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value);
+}
+
+// Mirror SSH's buildSshSpawnTarget: source the user's login profiles (and nvm)
+// before exec so commands run with the same PATH the user sees in an
+// interactive shell. e2b's `sandbox.commands.run` otherwise spawns a
+// non-login, non-interactive shell whose PATH does not include npm-globals,
+// nvm shims, or anything else the template installs via .profile/.bashrc —
+// which makes the hello probe fail with `exec: <cli>: not found` even when
+// the binary is on disk.
+function buildLoginShellScript(input: {
+  command: string;
+  args: string[];
+  env?: Record<string, string>;
+}): string {
+  const env = input.env ?? {};
+  for (const key of Object.keys(env)) {
+    if (!isValidShellEnvKey(key)) {
+      throw new Error(`Invalid sandbox environment variable key: ${key}`);
+    }
+  }
+  const envArgs = Object.entries(env)
+    .filter((entry): entry is [string, string] => typeof entry[1] === "string")
+    .map(([key, value]) => `${key}=${shellQuote(value)}`);
+  const commandParts = [shellQuote(input.command), ...input.args.map(shellQuote)].join(" ");
+  const execLine = envArgs.length > 0
+    ? `exec env ${envArgs.join(" ")} ${commandParts}`
+    : `exec ${commandParts}`;
+  return [
+    'if [ -f /etc/profile ]; then . /etc/profile >/dev/null 2>&1 || true; fi',
+    'if [ -f "$HOME/.profile" ]; then . "$HOME/.profile" >/dev/null 2>&1 || true; fi',
+    // .bash_profile typically sources .bashrc itself; only source .bashrc
+    // directly when no .bash_profile exists to avoid re-running idempotency-
+    // sensitive setup (nvm, PATH prepends) twice on templates that wire
+    // .bash_profile -> .bashrc.
+    'if [ -f "$HOME/.bash_profile" ]; then . "$HOME/.bash_profile" >/dev/null 2>&1 || true; elif [ -f "$HOME/.bashrc" ]; then . "$HOME/.bashrc" >/dev/null 2>&1 || true; fi',
+    'if [ -f "$HOME/.zprofile" ]; then . "$HOME/.zprofile" >/dev/null 2>&1 || true; fi',
+    'export NVM_DIR="${NVM_DIR:-$HOME/.nvm}"',
+    '[ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh" >/dev/null 2>&1 || true',
+    execLine,
+  ].join(" && ");
 }
 
 async function killSandboxBestEffort(sandbox: Sandbox, reason: string): Promise<void> {
@@ -351,7 +391,11 @@ const plugin = definePlugin({
 
     const config = parseDriverConfig(params.config);
     const sandbox = await connectSandbox(config, params.lease.providerLeaseId);
-    const baseCommand = buildCommandLine(params.command, params.args);
+    const baseCommand = buildLoginShellScript({
+      command: params.command,
+      args: params.args ?? [],
+      env: params.env,
+    });
     const timeoutMs = params.timeoutMs ?? config.timeoutMs;
 
     // For commands with stdin, stage the payload to a temp file inside the
@@ -379,9 +423,11 @@ const plugin = definePlugin({
       : baseCommand;
 
     try {
+      // Env is interpolated into the script via `exec env KEY=val …` after
+      // profile sourcing so user-configured env wins over anything profiles
+      // export. No need to pass `envs:` separately.
       const result = await sandbox.commands.run(command, {
         cwd: params.cwd,
-        envs: params.env,
         timeoutMs,
       }) as Awaited<ReturnType<Sandbox["commands"]["run"]>> & {
         exitCode: number;