mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-18 03:30:39 +09:00
> **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
289 lines
11 KiB
TypeScript
289 lines
11 KiB
TypeScript
import type {
|
|
AdapterEnvironmentCheck,
|
|
AdapterEnvironmentTestContext,
|
|
AdapterEnvironmentTestResult,
|
|
} from "@paperclipai/adapter-utils";
|
|
import {
|
|
asString,
|
|
parseObject,
|
|
ensurePathInEnv,
|
|
} from "@paperclipai/adapter-utils/server-utils";
|
|
import {
|
|
ensureAdapterExecutionTargetCommandResolvable,
|
|
ensureAdapterExecutionTargetDirectory,
|
|
maybeRunSandboxInstallCommand,
|
|
runAdapterExecutionTargetProcess,
|
|
describeAdapterExecutionTarget,
|
|
resolveAdapterExecutionTargetCwd,
|
|
} from "@paperclipai/adapter-utils/execution-target";
|
|
import path from "node:path";
|
|
import os from "node:os";
|
|
import { parseCodexJsonl } from "./parse.js";
|
|
import { SANDBOX_INSTALL_COMMAND } from "../index.js";
|
|
import { codexHomeDir, readCodexAuthInfo } from "./quota.js";
|
|
import { buildCodexExecArgs } from "./codex-args.js";
|
|
|
|
function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
|
|
if (checks.some((check) => check.level === "error")) return "fail";
|
|
if (checks.some((check) => check.level === "warn")) return "warn";
|
|
return "pass";
|
|
}
|
|
|
|
function isNonEmpty(value: unknown): value is string {
|
|
return typeof value === "string" && value.trim().length > 0;
|
|
}
|
|
|
|
function firstNonEmptyLine(text: string): string {
|
|
return (
|
|
text
|
|
.split(/\r?\n/)
|
|
.map((line) => line.trim())
|
|
.find(Boolean) ?? ""
|
|
);
|
|
}
|
|
|
|
function commandLooksLike(command: string, expected: string): boolean {
|
|
const base = path.basename(command).toLowerCase();
|
|
return base === expected || base === `${expected}.cmd` || base === `${expected}.exe`;
|
|
}
|
|
|
|
function summarizeProbeDetail(stdout: string, stderr: string, parsedError: string | null): string | null {
|
|
const raw = parsedError?.trim() || firstNonEmptyLine(stderr) || firstNonEmptyLine(stdout);
|
|
if (!raw) return null;
|
|
const clean = raw.replace(/\s+/g, " ").trim();
|
|
const max = 240;
|
|
return clean.length > max ? `${clean.slice(0, max - 1)}…` : clean;
|
|
}
|
|
|
|
const CODEX_AUTH_REQUIRED_RE =
|
|
/(?:not\s+logged\s+in|login\s+required|authentication\s+required|unauthorized|invalid(?:\s+or\s+missing)?\s+api(?:[_\s-]?key)?|openai[_\s-]?api[_\s-]?key|api[_\s-]?key.*required|please\s+run\s+`?codex\s+login`?)/i;
|
|
|
|
export async function testEnvironment(
|
|
ctx: AdapterEnvironmentTestContext,
|
|
): Promise<AdapterEnvironmentTestResult> {
|
|
const checks: AdapterEnvironmentCheck[] = [];
|
|
const config = parseObject(ctx.config);
|
|
const command = asString(config.command, "codex");
|
|
const target = ctx.executionTarget ?? null;
|
|
const targetIsRemote = target?.kind === "remote";
|
|
const cwd = resolveAdapterExecutionTargetCwd(target, asString(config.cwd, ""), process.cwd());
|
|
const targetLabel = targetIsRemote
|
|
? ctx.environmentName ?? describeAdapterExecutionTarget(target)
|
|
: null;
|
|
const runId = `codex-envtest-${Date.now()}-${Math.random().toString(16).slice(2)}`;
|
|
|
|
if (targetLabel) {
|
|
checks.push({
|
|
code: "codex_environment_target",
|
|
level: "info",
|
|
message: `Probing inside environment: ${targetLabel}`,
|
|
});
|
|
}
|
|
|
|
try {
|
|
await ensureAdapterExecutionTargetDirectory(runId, target, cwd, {
|
|
cwd,
|
|
env: {},
|
|
createIfMissing: true,
|
|
});
|
|
checks.push({
|
|
code: "codex_cwd_valid",
|
|
level: "info",
|
|
message: `Working directory is valid: ${cwd}`,
|
|
});
|
|
} catch (err) {
|
|
checks.push({
|
|
code: "codex_cwd_invalid",
|
|
level: "error",
|
|
message: err instanceof Error ? err.message : "Invalid working directory",
|
|
detail: cwd,
|
|
});
|
|
}
|
|
|
|
const envConfig = parseObject(config.env);
|
|
const env: Record<string, string> = {};
|
|
for (const [key, value] of Object.entries(envConfig)) {
|
|
if (typeof value === "string") env[key] = value;
|
|
}
|
|
const runtimeEnv = ensurePathInEnv({ ...process.env, ...env });
|
|
const installCheck = await maybeRunSandboxInstallCommand({
|
|
runId,
|
|
target,
|
|
adapterKey: "codex",
|
|
installCommand: SANDBOX_INSTALL_COMMAND,
|
|
detectCommand: command,
|
|
env,
|
|
});
|
|
if (installCheck) checks.push(installCheck);
|
|
try {
|
|
await ensureAdapterExecutionTargetCommandResolvable(command, target, cwd, runtimeEnv);
|
|
checks.push({
|
|
code: "codex_command_resolvable",
|
|
level: "info",
|
|
message: `Command is executable: ${command}`,
|
|
});
|
|
} catch (err) {
|
|
checks.push({
|
|
code: "codex_command_unresolvable",
|
|
level: "error",
|
|
message: err instanceof Error ? err.message : "Command is not executable",
|
|
detail: command,
|
|
});
|
|
}
|
|
|
|
const configOpenAiKey = env.OPENAI_API_KEY;
|
|
const hostOpenAiKey = targetIsRemote ? undefined : process.env.OPENAI_API_KEY;
|
|
if (isNonEmpty(configOpenAiKey) || isNonEmpty(hostOpenAiKey)) {
|
|
const source = isNonEmpty(configOpenAiKey) ? "adapter config env" : "server environment";
|
|
checks.push({
|
|
code: "codex_openai_api_key_present",
|
|
level: "info",
|
|
message: "OPENAI_API_KEY is set for Codex authentication.",
|
|
detail: `Detected in ${source}.`,
|
|
});
|
|
} else if (!targetIsRemote) {
|
|
// Local-only auth file check. On remote targets, the probe will surface
|
|
// any missing-auth errors directly from the remote `codex` invocation.
|
|
const codexHome = isNonEmpty(env.CODEX_HOME) ? env.CODEX_HOME : undefined;
|
|
const codexAuth = await readCodexAuthInfo(codexHome).catch(() => null);
|
|
if (codexAuth) {
|
|
checks.push({
|
|
code: "codex_native_auth_present",
|
|
level: "info",
|
|
message: "Codex is authenticated via its own auth configuration.",
|
|
detail: codexAuth.email ? `Logged in as ${codexAuth.email}.` : `Credentials found in ${path.join(codexHome ?? codexHomeDir(), "auth.json")}.`,
|
|
});
|
|
} else {
|
|
checks.push({
|
|
code: "codex_openai_api_key_missing",
|
|
level: "warn",
|
|
message: "OPENAI_API_KEY is not set. Codex runs may fail until authentication is configured.",
|
|
hint: "Set OPENAI_API_KEY in adapter env, shell environment, or run `codex auth` to log in.",
|
|
});
|
|
}
|
|
}
|
|
|
|
const canRunProbe =
|
|
checks.every((check) => check.code !== "codex_cwd_invalid" && check.code !== "codex_command_unresolvable");
|
|
if (canRunProbe) {
|
|
if (!commandLooksLike(command, "codex")) {
|
|
checks.push({
|
|
code: "codex_hello_probe_skipped_custom_command",
|
|
level: "info",
|
|
message: "Skipped hello probe because command is not `codex`.",
|
|
detail: command,
|
|
hint: "Use the `codex` CLI command to run the automatic login and installation probe.",
|
|
});
|
|
} else {
|
|
const execArgs = buildCodexExecArgs({ ...config, fastMode: false });
|
|
const args = execArgs.args;
|
|
if (execArgs.fastModeIgnoredReason) {
|
|
checks.push({
|
|
code: "codex_fast_mode_unsupported_model",
|
|
level: "warn",
|
|
message: execArgs.fastModeIgnoredReason,
|
|
hint: "Switch the agent model to GPT-5.4 or enter a manual model ID to enable Codex Fast mode.",
|
|
});
|
|
}
|
|
|
|
// Codex CLI (>= 0.122) ignores the OPENAI_API_KEY env var and only reads
|
|
// credentials from $CODEX_HOME/auth.json. When we have a key available,
|
|
// wrap the probe with a shell that materializes a per-run auth.json so
|
|
// the CLI can authenticate. The key content is passed via env (not on
|
|
// the command line) to avoid leaking it into process listings.
|
|
const probeApiKey = isNonEmpty(configOpenAiKey)
|
|
? configOpenAiKey
|
|
: isNonEmpty(hostOpenAiKey)
|
|
? hostOpenAiKey
|
|
: null;
|
|
let probeCommand = command;
|
|
let probeArgs = args;
|
|
const probeEnv: Record<string, string> = { ...env };
|
|
if (probeApiKey) {
|
|
const probeHome = targetIsRemote
|
|
? `/tmp/paperclip-codex-probe-${runId}`
|
|
: path.join(os.tmpdir(), `paperclip-codex-probe-${runId}`);
|
|
probeEnv.CODEX_HOME = probeHome;
|
|
probeEnv._PAPERCLIP_CODEX_AUTH_JSON = JSON.stringify({ OPENAI_API_KEY: probeApiKey });
|
|
probeCommand = "sh";
|
|
// Trap on EXIT removes the probe home (with the API-key auth.json) on
|
|
// any exit path; we drop `exec` so the wrapper shell stays alive long
|
|
// enough for the trap to fire after the child returns.
|
|
probeArgs = [
|
|
"-c",
|
|
'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
|
|
command,
|
|
...args,
|
|
];
|
|
}
|
|
|
|
const probe = await runAdapterExecutionTargetProcess(
|
|
runId,
|
|
target,
|
|
probeCommand,
|
|
probeArgs,
|
|
{
|
|
cwd,
|
|
env: probeEnv,
|
|
timeoutSec: 45,
|
|
graceSec: 5,
|
|
stdin: "Respond with hello.",
|
|
onLog: async () => {},
|
|
},
|
|
);
|
|
const parsed = parseCodexJsonl(probe.stdout);
|
|
const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
|
|
const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();
|
|
|
|
if (probe.timedOut) {
|
|
checks.push({
|
|
code: "codex_hello_probe_timed_out",
|
|
level: "warn",
|
|
message: "Codex hello probe timed out.",
|
|
hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
|
|
});
|
|
} else if ((probe.exitCode ?? 1) === 0) {
|
|
const summary = parsed.summary.trim();
|
|
const hasHello = /\bhello\b/i.test(summary);
|
|
checks.push({
|
|
code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
|
|
level: hasHello ? "info" : "warn",
|
|
message: hasHello
|
|
? "Codex hello probe succeeded."
|
|
: "Codex probe ran but did not return `hello` as expected.",
|
|
...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
|
|
...(hasHello
|
|
? {}
|
|
: {
|
|
hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
|
|
}),
|
|
});
|
|
} else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
|
|
checks.push({
|
|
code: "codex_hello_probe_auth_required",
|
|
level: "warn",
|
|
message: "Codex CLI is installed, but authentication is not ready.",
|
|
...(detail ? { detail } : {}),
|
|
hint: probeApiKey
|
|
? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
|
|
: "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
|
|
});
|
|
} else {
|
|
checks.push({
|
|
code: "codex_hello_probe_failed",
|
|
level: "error",
|
|
message: "Codex hello probe failed.",
|
|
...(detail ? { detail } : {}),
|
|
hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
adapterType: ctx.adapterType,
|
|
status: summarizeStatus(checks),
|
|
checks,
|
|
testedAt: new Date().toISOString(),
|
|
};
|
|
}
|