mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-14 01:50:39 +09:00
> **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
227 lines
8.4 KiB
TypeScript
227 lines
8.4 KiB
TypeScript
import path from "node:path";
|
|
import {
|
|
prepareSandboxManagedRuntime,
|
|
type PreparedSandboxManagedRuntime,
|
|
type SandboxManagedRuntimeAsset,
|
|
type SandboxManagedRuntimeClient,
|
|
type SandboxRemoteExecutionSpec,
|
|
} from "./sandbox-managed-runtime.js";
|
|
import { preferredShellForSandbox } from "./sandbox-shell.js";
|
|
import type { RunProcessResult } from "./server-utils.js";
|
|
|
|
export interface CommandManagedRuntimeRunner {
|
|
execute(input: {
|
|
command: string;
|
|
args?: string[];
|
|
cwd?: string;
|
|
env?: Record<string, string>;
|
|
stdin?: string;
|
|
timeoutMs?: number;
|
|
onLog?: (stream: "stdout" | "stderr", chunk: string) => Promise<void>;
|
|
onSpawn?: (meta: { pid: number; startedAt: string }) => Promise<void>;
|
|
}): Promise<RunProcessResult>;
|
|
}
|
|
|
|
export interface CommandManagedRuntimeSpec {
|
|
providerKey?: string | null;
|
|
shellCommand?: "bash" | "sh" | null;
|
|
leaseId?: string | null;
|
|
remoteCwd: string;
|
|
timeoutMs?: number | null;
|
|
}
|
|
|
|
export type CommandManagedRuntimeAsset = SandboxManagedRuntimeAsset;
|
|
|
|
function shellQuote(value: string) {
|
|
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
|
}
|
|
|
|
function mergeRuntimeExcludes(entries: string[] | undefined): string[] {
|
|
return [...new Set([".paperclip-runtime", ...(entries ?? [])])];
|
|
}
|
|
|
|
const REMOTE_WRITE_BASE64_CHUNK_SIZE = 32 * 1024;
|
|
|
|
function toBuffer(bytes: Buffer | Uint8Array | ArrayBuffer): Buffer {
|
|
if (Buffer.isBuffer(bytes)) return bytes;
|
|
if (bytes instanceof ArrayBuffer) return Buffer.from(bytes);
|
|
return Buffer.from(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
}
|
|
|
|
function requireSuccessfulResult(result: RunProcessResult, action: string): void {
|
|
if (result.exitCode === 0 && !result.timedOut) return;
|
|
const stderr = result.stderr.trim();
|
|
const detail = stderr.length > 0 ? `: ${stderr}` : "";
|
|
throw new Error(`${action} failed with exit code ${result.exitCode ?? "null"}${detail}`);
|
|
}
|
|
|
|
export function createCommandManagedRuntimeClient(input: {
|
|
runner: CommandManagedRuntimeRunner;
|
|
remoteCwd: string;
|
|
timeoutMs: number;
|
|
shellCommand?: "bash" | "sh" | null;
|
|
}): SandboxManagedRuntimeClient {
|
|
const shellCommand = preferredShellForSandbox(input.shellCommand);
|
|
const runShell = async (script: string, opts: { stdin?: string; timeoutMs?: number } = {}) => {
|
|
const result = await input.runner.execute({
|
|
command: shellCommand,
|
|
args: ["-lc", script],
|
|
cwd: input.remoteCwd,
|
|
stdin: opts.stdin,
|
|
timeoutMs: opts.timeoutMs ?? input.timeoutMs,
|
|
});
|
|
requireSuccessfulResult(result, script);
|
|
return result;
|
|
};
|
|
|
|
return {
|
|
makeDir: async (remotePath) => {
|
|
await runShell(`mkdir -p ${shellQuote(remotePath)}`);
|
|
},
|
|
writeFile: async (remotePath, bytes) => {
|
|
const body = toBuffer(bytes).toString("base64");
|
|
const remoteDir = path.posix.dirname(remotePath);
|
|
const remoteTempPath = `${remotePath}.paperclip-upload.b64`;
|
|
|
|
await runShell(
|
|
`mkdir -p ${shellQuote(remoteDir)} && rm -f ${shellQuote(remoteTempPath)} && : > ${shellQuote(remoteTempPath)}`,
|
|
);
|
|
for (let offset = 0; offset < body.length; offset += REMOTE_WRITE_BASE64_CHUNK_SIZE) {
|
|
const chunk = body.slice(offset, offset + REMOTE_WRITE_BASE64_CHUNK_SIZE);
|
|
await runShell(`printf '%s' ${shellQuote(chunk)} >> ${shellQuote(remoteTempPath)}`);
|
|
}
|
|
await runShell(
|
|
`base64 -d < ${shellQuote(remoteTempPath)} > ${shellQuote(remotePath)} && rm -f ${shellQuote(remoteTempPath)}`,
|
|
);
|
|
},
|
|
readFile: async (remotePath) => {
|
|
const result = await runShell(`base64 < ${shellQuote(remotePath)}`);
|
|
return Buffer.from(result.stdout.replace(/\s+/g, ""), "base64");
|
|
},
|
|
listFiles: async (remotePath) => {
|
|
const result = await runShell(
|
|
`if [ -d ${shellQuote(remotePath)} ]; then ` +
|
|
`for entry in ${shellQuote(remotePath)}/*; do ` +
|
|
`[ -f "$entry" ] || continue; ` +
|
|
`basename "$entry"; ` +
|
|
`done; ` +
|
|
`fi`,
|
|
);
|
|
return result.stdout
|
|
.split(/\r?\n/)
|
|
.map((entry) => entry.trim())
|
|
.filter((entry) => entry.length > 0)
|
|
.sort((left, right) => left.localeCompare(right));
|
|
},
|
|
remove: async (remotePath) => {
|
|
const result = await input.runner.execute({
|
|
command: shellCommand,
|
|
args: ["-lc", `rm -rf ${shellQuote(remotePath)}`],
|
|
cwd: input.remoteCwd,
|
|
timeoutMs: input.timeoutMs,
|
|
});
|
|
requireSuccessfulResult(result, `remove ${remotePath}`);
|
|
},
|
|
run: async (command, options) => {
|
|
const result = await input.runner.execute({
|
|
command: shellCommand,
|
|
args: ["-lc", command],
|
|
cwd: input.remoteCwd,
|
|
timeoutMs: options.timeoutMs,
|
|
});
|
|
requireSuccessfulResult(result, command);
|
|
},
|
|
};
|
|
}
|
|
|
|
export async function prepareCommandManagedRuntime(input: {
|
|
runner: CommandManagedRuntimeRunner;
|
|
spec: CommandManagedRuntimeSpec;
|
|
adapterKey: string;
|
|
workspaceLocalDir: string;
|
|
workspaceRemoteDir?: string;
|
|
workspaceExclude?: string[];
|
|
preserveAbsentOnRestore?: string[];
|
|
assets?: CommandManagedRuntimeAsset[];
|
|
installCommand?: string | null;
|
|
/** When provided alongside `installCommand`, skip the install if `command -v <detectCommand>` succeeds. */
|
|
detectCommand?: string | null;
|
|
}): Promise<PreparedSandboxManagedRuntime> {
|
|
const timeoutMs = input.spec.timeoutMs && input.spec.timeoutMs > 0 ? input.spec.timeoutMs : 300_000;
|
|
const workspaceRemoteDir = input.workspaceRemoteDir ?? input.spec.remoteCwd;
|
|
const runtimeSpec: SandboxRemoteExecutionSpec = {
|
|
transport: "sandbox",
|
|
provider: input.spec.providerKey ?? "sandbox",
|
|
sandboxId: input.spec.leaseId ?? "managed",
|
|
remoteCwd: workspaceRemoteDir,
|
|
timeoutMs,
|
|
apiKey: null,
|
|
};
|
|
const client = createCommandManagedRuntimeClient({
|
|
runner: input.runner,
|
|
remoteCwd: workspaceRemoteDir,
|
|
timeoutMs,
|
|
shellCommand: input.spec.shellCommand,
|
|
});
|
|
const shellCommand = preferredShellForSandbox(input.spec.shellCommand);
|
|
|
|
if (input.installCommand?.trim()) {
|
|
const installCommand = input.installCommand.trim();
|
|
const detectCommand = input.detectCommand?.trim();
|
|
// Skip the install when the binary is already on PATH. Without this
|
|
// probe the install runs unconditionally on every execute() call (and
|
|
// also runs a second time after `ensureAdapterExecutionTargetCommandResolvable`
|
|
// has already installed it during the resolvability gate).
|
|
if (detectCommand) {
|
|
const probe = await input.runner.execute({
|
|
command: shellCommand,
|
|
args: ["-lc", `command -v ${shellQuote(detectCommand)} >/dev/null 2>&1`],
|
|
cwd: workspaceRemoteDir,
|
|
timeoutMs,
|
|
});
|
|
if (!probe.timedOut && (probe.exitCode ?? 1) === 0) {
|
|
return await prepareSandboxManagedRuntime({
|
|
spec: runtimeSpec,
|
|
client,
|
|
adapterKey: input.adapterKey,
|
|
workspaceLocalDir: input.workspaceLocalDir,
|
|
workspaceRemoteDir,
|
|
workspaceExclude: mergeRuntimeExcludes(input.workspaceExclude),
|
|
preserveAbsentOnRestore: input.preserveAbsentOnRestore,
|
|
assets: input.assets,
|
|
});
|
|
}
|
|
}
|
|
const result = await input.runner.execute({
|
|
command: shellCommand,
|
|
args: ["-lc", installCommand],
|
|
cwd: workspaceRemoteDir,
|
|
timeoutMs,
|
|
});
|
|
// A failed install is not always fatal: the CLI may already be on PATH
|
|
// from a previous lease, the template image, or another path entry. Log
|
|
// and continue rather than aborting the agent run; downstream code that
|
|
// exec's the CLI will surface a clear "command not found" if it is in
|
|
// fact missing. The test path's `maybeRunSandboxInstallCommand` already
|
|
// honors this contract — keep them consistent.
|
|
if (result.timedOut || (result.exitCode ?? 0) !== 0) {
|
|
const tail = (text: string) =>
|
|
text.split(/\r?\n/).filter((line) => line.trim().length > 0).slice(-3).join(" | ").slice(0, 480);
|
|
const reason = result.timedOut ? "timed out" : `exited ${result.exitCode ?? "?"}`;
|
|
console.warn(
|
|
`[paperclip] managed-runtime install command ${reason}: ${installCommand} :: ${tail(result.stderr || result.stdout)}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
return await prepareSandboxManagedRuntime({
|
|
spec: runtimeSpec,
|
|
client,
|
|
adapterKey: input.adapterKey,
|
|
workspaceLocalDir: input.workspaceLocalDir,
|
|
workspaceRemoteDir,
|
|
workspaceExclude: mergeRuntimeExcludes(input.workspaceExclude),
|
|
preserveAbsentOnRestore: input.preserveAbsentOnRestore,
|
|
assets: input.assets,
|
|
});
|
|
}
|