Wire per-adapter sandbox install commands through test and execute paths (#5280)

> **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-06-14 01:50:39 +09:00 · 2026-05-05 08:29:28 -07:00 · 2026-05-05 08:29:28 -07:00 · 9578dc3da7
commit 9578dc3da7
parent af9386f879
20 changed files with 355 additions and 29 deletions
--- a/packages/adapter-utils/src/command-managed-runtime.ts
+++ b/packages/adapter-utils/src/command-managed-runtime.ts
@ -144,6 +144,8 @@ export async function prepareCommandManagedRuntime(input: {
  preserveAbsentOnRestore?: string[];
  assets?: CommandManagedRuntimeAsset[];
  installCommand?: string | null;
+  /** When provided alongside `installCommand`, skip the install if `command -v <detectCommand>` succeeds. */
+  detectCommand?: string | null;
 }): Promise<PreparedSandboxManagedRuntime> {
  const timeoutMs = input.spec.timeoutMs && input.spec.timeoutMs > 0 ? input.spec.timeoutMs : 300_000;
  const workspaceRemoteDir = input.workspaceRemoteDir ?? input.spec.remoteCwd;
@ -164,13 +166,52 @@ export async function prepareCommandManagedRuntime(input: {
  const shellCommand = preferredShellForSandbox(input.spec.shellCommand);

  if (input.installCommand?.trim()) {
+    const installCommand = input.installCommand.trim();
+    const detectCommand = input.detectCommand?.trim();
+    // Skip the install when the binary is already on PATH. Without this
+    // probe the install runs unconditionally on every execute() call (and
+    // also runs a second time after `ensureAdapterExecutionTargetCommandResolvable`
+    // has already installed it during the resolvability gate).
+    if (detectCommand) {
+      const probe = await input.runner.execute({
+        command: shellCommand,
+        args: ["-lc", `command -v ${shellQuote(detectCommand)} >/dev/null 2>&1`],
+        cwd: workspaceRemoteDir,
+        timeoutMs,
+      });
+      if (!probe.timedOut && (probe.exitCode ?? 1) === 0) {
+        return await prepareSandboxManagedRuntime({
+          spec: runtimeSpec,
+          client,
+          adapterKey: input.adapterKey,
+          workspaceLocalDir: input.workspaceLocalDir,
+          workspaceRemoteDir,
+          workspaceExclude: mergeRuntimeExcludes(input.workspaceExclude),
+          preserveAbsentOnRestore: input.preserveAbsentOnRestore,
+          assets: input.assets,
+        });
+      }
+    }
    const result = await input.runner.execute({
      command: shellCommand,
-      args: ["-lc", input.installCommand.trim()],
+      args: ["-lc", installCommand],
      cwd: workspaceRemoteDir,
      timeoutMs,
    });
-    requireSuccessfulResult(result, input.installCommand.trim());
+    // A failed install is not always fatal: the CLI may already be on PATH
+    // from a previous lease, the template image, or another path entry. Log
+    // and continue rather than aborting the agent run; downstream code that
+    // exec's the CLI will surface a clear "command not found" if it is in
+    // fact missing. The test path's `maybeRunSandboxInstallCommand` already
+    // honors this contract — keep them consistent.
+    if (result.timedOut || (result.exitCode ?? 0) !== 0) {
+      const tail = (text: string) =>
+        text.split(/\r?\n/).filter((line) => line.trim().length > 0).slice(-3).join(" | ").slice(0, 480);
+      const reason = result.timedOut ? "timed out" : `exited ${result.exitCode ?? "?"}`;
+      console.warn(
+        `[paperclip] managed-runtime install command ${reason}: ${installCommand} :: ${tail(result.stderr || result.stdout)}`,
+      );
+    }
  }

  return await prepareSandboxManagedRuntime({
--- a/packages/adapter-utils/src/execution-target.ts
+++ b/packages/adapter-utils/src/execution-target.ts
@ -230,9 +230,10 @@ export async function ensureAdapterExecutionTargetCommandResolvable(
  target: AdapterExecutionTarget | null | undefined,
  cwd: string,
  env: NodeJS.ProcessEnv,
+  options: { installCommand?: string | null } = {},
 ) {
  if (target?.kind === "remote" && target.transport === "sandbox") {
-    await ensureSandboxCommandResolvable(command, target);
+    await ensureSandboxCommandResolvable(command, target, options.installCommand?.trim() || null);
    return;
  }
  await ensureCommandResolvable(command, cwd, env, {
@ -240,17 +241,10 @@ export async function ensureAdapterExecutionTargetCommandResolvable(
  });
 }

-async function ensureSandboxCommandResolvable(
+async function probeSandboxCommandResolvable(
  command: string,
  target: AdapterSandboxExecutionTarget,
-): Promise<void> {
-  // Probe whether the binary is resolvable inside the sandbox. We previously
-  // short-circuited this for sandbox targets, which let the caller report a
-  // success message even when the CLI was missing from the image. Now we run
-  // a real `command -v` through the same runner the hello probe will use, so
-  // the first step honestly reflects whether the binary is on PATH. The
-  // sandbox provider is responsible for sourcing login profiles (e2b mirrors
-  // SSH's buildSshSpawnTarget) so this and the hello probe agree on PATH.
+): Promise<{ resolved: boolean; timedOut: boolean; stderr: string }> {
  const runner = requireSandboxRunner(target);
  const probeScript = `command -v ${shellQuote(command)}`;
  const result = await runner.execute({
@ -259,14 +253,67 @@ async function ensureSandboxCommandResolvable(
    cwd: target.remoteCwd,
    timeoutMs: target.timeoutMs ?? 15_000,
  });
-  if (result.timedOut) {
+  return {
+    resolved: !result.timedOut && (result.exitCode ?? 1) === 0,
+    timedOut: result.timedOut,
+    stderr: result.stderr.trim(),
+  };
+}
+
+async function ensureSandboxCommandResolvable(
+  command: string,
+  target: AdapterSandboxExecutionTarget,
+  installCommand: string | null,
+): Promise<void> {
+  // Probe whether the binary is resolvable inside the sandbox. We previously
+  // short-circuited this for sandbox targets, which let the caller report a
+  // success message even when the CLI was missing from the image. Now we run
+  // a real `command -v` through the same runner the hello probe will use, so
+  // the first step honestly reflects whether the binary is on PATH. The
+  // sandbox provider is responsible for sourcing login profiles (e2b mirrors
+  // SSH's buildSshSpawnTarget) so this and the hello probe agree on PATH.
+  let probe = await probeSandboxCommandResolvable(command, target);
+  if (probe.resolved) return;
+  if (probe.timedOut) {
    throw new Error(`Timed out checking command "${command}" on sandbox target.`);
  }
-  if ((result.exitCode ?? 1) === 0) return;
-  const stderr = result.stderr.trim();
-  const detail = stderr.length > 0 ? ` (${stderr})` : "";
+
+  // If the caller supplied an install command, attempt the install once via
+  // the sandbox runner (which the sandbox provider wraps in a login shell)
+  // and re-probe before reporting failure. This lets fresh sandbox leases
+  // bring up the CLI before the resolvability gate, mirroring the test path.
+  let installFailureDetail: string | null = null;
+  if (installCommand) {
+    const runner = requireSandboxRunner(target);
+    try {
+      const installResult = await runner.execute({
+        command: "sh",
+        args: ["-lc", installCommand],
+        cwd: target.remoteCwd,
+        timeoutMs: target.timeoutMs ?? 300_000,
+      });
+      if (installResult.timedOut) {
+        installFailureDetail = `install command timed out: ${installCommand}`;
+      } else if ((installResult.exitCode ?? 0) !== 0) {
+        const tail = (text: string) =>
+          text.split(/\r?\n/).filter((line) => line.trim().length > 0).slice(-2).join(" | ").slice(0, 240);
+        const reason = tail(installResult.stderr || installResult.stdout) || `exit ${installResult.exitCode ?? "?"}`;
+        installFailureDetail = `install command exited ${installResult.exitCode ?? "?"}: ${reason}`;
+      }
+    } catch (err) {
+      installFailureDetail = `install command threw: ${err instanceof Error ? err.message : String(err)}`;
+    }
+    probe = await probeSandboxCommandResolvable(command, target);
+    if (probe.resolved) return;
+    if (probe.timedOut) {
+      throw new Error(`Timed out checking command "${command}" on sandbox target.`);
+    }
+  }
+
+  const probeStderr = probe.stderr.length > 0 ? ` probe stderr: ${probe.stderr}` : "";
+  const installDetail = installFailureDetail ? `; ${installFailureDetail}` : "";
  throw new Error(
-    `Command "${command}" is not installed or not on PATH in the sandbox environment${detail}.`,
+    `Command "${command}" is not installed or not on PATH in the sandbox environment${installDetail}.${probeStderr}`,
  );
 }

@ -409,6 +456,111 @@ export async function runAdapterExecutionTargetShellCommand(
  );
 }

+export interface AdapterSandboxInstallCommandCheck {
+  code: string;
+  level: "info" | "warn" | "error";
+  message: string;
+  detail?: string;
+  hint?: string;
+}
+
+// Best-effort run of an adapter-supplied install command on a sandbox target
+// before the resolvability + hello probe. Returns null for non-sandbox
+// targets so callers can no-op. Returns a structured check otherwise — never
+// throws — so the rest of the test still runs and reports the post-install
+// state honestly. Caller pushes the check into its result array; the test
+// report shows whether install was attempted and what came back.
+export async function maybeRunSandboxInstallCommand(input: {
+  runId: string;
+  target: AdapterExecutionTarget | null | undefined;
+  adapterKey: string;
+  installCommand: string;
+  /** When provided, skip the install if `command -v <detectCommand>` succeeds. */
+  detectCommand?: string | null;
+  env?: Record<string, string>;
+  timeoutSec?: number;
+}): Promise<AdapterSandboxInstallCommandCheck | null> {
+  const { target, adapterKey, installCommand } = input;
+  if (!target || target.kind !== "remote" || target.transport !== "sandbox") {
+    return null;
+  }
+  const trimmed = installCommand.trim();
+  if (trimmed.length === 0) return null;
+
+  const code = `${adapterKey}_install_command_run`;
+
+  // Skip install when the binary is already on PATH. Avoids running
+  // network-dependent installers (e.g. `curl ... | bash`) on every test
+  // probe when the CLI is preinstalled on the lease/template.
+  const detectCommand = input.detectCommand?.trim();
+  if (detectCommand) {
+    try {
+      const probe = await runAdapterExecutionTargetShellCommand(
+        input.runId,
+        target,
+        `command -v ${shellQuote(detectCommand)} >/dev/null 2>&1`,
+        {
+          cwd: target.remoteCwd,
+          env: input.env ?? {},
+          timeoutSec: 30,
+          graceSec: 5,
+        },
+      );
+      if (!probe.timedOut && probe.exitCode === 0) {
+        return {
+          code,
+          level: "info",
+          message: `${detectCommand} already on PATH; skipped install.`,
+        };
+      }
+    } catch {
+      // Fall through to actually running the install — failure to probe
+      // is not a reason to skip the install gate.
+    }
+  }
+
+  let result;
+  try {
+    result = await runAdapterExecutionTargetShellCommand(input.runId, target, trimmed, {
+      cwd: target.remoteCwd,
+      env: input.env ?? {},
+      timeoutSec: input.timeoutSec ?? 240,
+      graceSec: 10,
+    });
+  } catch (err) {
+    return {
+      code,
+      level: "warn",
+      message: "Install command threw before completion.",
+      detail: err instanceof Error ? err.message : String(err),
+    };
+  }
+  const tail = (text: string) =>
+    text.split(/\r?\n/).filter((line) => line.trim().length > 0).slice(-3).join(" | ").slice(0, 480);
+  if (result.timedOut) {
+    return {
+      code,
+      level: "warn",
+      message: `Install command timed out: ${trimmed}`,
+      detail: tail(result.stderr || result.stdout),
+    };
+  }
+  if ((result.exitCode ?? 1) === 0) {
+    return {
+      code,
+      level: "info",
+      message: `Install command ran: ${trimmed}`,
+      ...(tail(result.stdout) ? { detail: tail(result.stdout) } : {}),
+    };
+  }
+  return {
+    code,
+    level: "warn",
+    message: `Install command exited ${result.exitCode}: ${trimmed}`,
+    detail: tail(result.stderr || result.stdout),
+  };
+}
+
 export async function readAdapterExecutionTargetHomeDir(
  runId: string,
  target: AdapterExecutionTarget | null | undefined,
@ -470,12 +622,43 @@ export async function ensureAdapterExecutionTargetRuntimeCommandInstalled(input:
      onLog: input.onLog,
    },
  );
+
+  // A failed or timed-out install is not necessarily fatal: the CLI may already
+  // be on PATH from a previous lease's install, the template image, or another
+  // path entry. Re-run the detect probe (when one is configured) so a transient
+  // install failure does not abort the agent run when the binary is reachable.
+  const installFailed = result.timedOut || (result.exitCode ?? 0) !== 0;
+  if (!installFailed) {
+    return;
+  }
+  if (detectCommand) {
+    const recheck = await runAdapterExecutionTargetShellCommand(
+      input.runId,
+      input.target,
+      `command -v ${shellQuote(detectCommand)} >/dev/null 2>&1`,
+      {
+        cwd: input.cwd,
+        env: input.env,
+        timeoutSec: input.timeoutSec,
+        graceSec: input.graceSec,
+      },
+    );
+    if (!recheck.timedOut && recheck.exitCode === 0) {
+      if (input.onLog) {
+        const reason = result.timedOut ? "timed out" : `exited ${result.exitCode ?? "?"}`;
+        await input.onLog(
+          "stderr",
+          `[paperclip] Install command ${reason} (${installCommand}) but ${detectCommand} is on PATH; continuing.\n`,
+        );
+      }
+      return;
+    }
+  }
+
  if (result.timedOut) {
    throw new Error(`Timed out while installing the adapter runtime command via: ${installCommand}`);
  }
-  if ((result.exitCode ?? 0) !== 0) {
-    throw new Error(`Failed to install the adapter runtime command via: ${installCommand}`);
-  }
+  throw new Error(`Failed to install the adapter runtime command via: ${installCommand}`);
 }

 export async function ensureAdapterExecutionTargetFile(
@ -666,6 +849,8 @@ export async function prepareAdapterExecutionTargetRuntime(input: {
  preserveAbsentOnRestore?: string[];
  assets?: AdapterManagedRuntimeAsset[];
  installCommand?: string | null;
+  /** When provided alongside `installCommand`, skip the install if the binary is already on PATH. */
+  detectCommand?: string | null;
 }): Promise<PreparedAdapterExecutionTargetRuntime> {
  const target = input.target ?? { kind: "local" as const };
  if (target.kind === "local") {
@ -707,6 +892,7 @@ export async function prepareAdapterExecutionTargetRuntime(input: {
    preserveAbsentOnRestore: input.preserveAbsentOnRestore,
    assets: input.assets,
    installCommand: input.installCommand,
+    detectCommand: input.detectCommand,
  });
  return {
    target,