Stabilize runtime probes and Codex env tests (#5445)

## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-06-17 03:10:38 +09:00 · 2026-05-07 14:52:31 -07:00 · 2026-05-07 14:52:31 -07:00 · fe3904f434
commit fe3904f434
parent 12cb7b40fd
12 changed files with 639 additions and 90 deletions
--- a/packages/adapters/codex-local/src/server/test.ts
+++ b/packages/adapters/codex-local/src/server/test.ts
@ -15,13 +15,16 @@ import {
  runAdapterExecutionTargetProcess,
  describeAdapterExecutionTarget,
  resolveAdapterExecutionTargetCwd,
+  prepareAdapterExecutionTargetRuntime,
 } from "@paperclipai/adapter-utils/execution-target";
+import fs from "node:fs/promises";
 import path from "node:path";
 import os from "node:os";
 import { parseCodexJsonl } from "./parse.js";
 import { SANDBOX_INSTALL_COMMAND } from "../index.js";
 import { codexHomeDir, readCodexAuthInfo } from "./quota.js";
 import { buildCodexExecArgs } from "./codex-args.js";
+import { prepareManagedCodexHome } from "./codex-home.js";

 function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
  if (checks.some((check) => check.level === "error")) return "fail";
@ -58,6 +61,99 @@ function summarizeProbeDetail(stdout: string, stderr: string, parsedError: strin
 const CODEX_AUTH_REQUIRED_RE =
  /(?:not\s+logged\s+in|login\s+required|authentication\s+required|unauthorized|invalid(?:\s+or\s+missing)?\s+api(?:[_\s-]?key)?|openai[_\s-]?api[_\s-]?key|api[_\s-]?key.*required|please\s+run\s+`?codex\s+login`?)/i;

+async function prepareCodexHelloProbe(input: {
+  runId: string;
+  companyId: string;
+  target: AdapterEnvironmentTestContext["executionTarget"] | null;
+  targetIsRemote: boolean;
+  cwd: string;
+  command: string;
+  args: string[];
+  env: Record<string, string>;
+  probeApiKey: string | null;
+}): Promise<{
+  command: string;
+  args: string[];
+  env: Record<string, string>;
+  cleanup: () => Promise<void>;
+}> {
+  let preparedRuntime: Awaited<ReturnType<typeof prepareAdapterExecutionTargetRuntime>> | null = null;
+  let preparedRuntimeWorkspaceLocalDir: string | null = null;
+
+  const cleanup = async () => {
+    await preparedRuntime?.restoreWorkspace().catch(() => {});
+    if (preparedRuntimeWorkspaceLocalDir) {
+      await fs.rm(preparedRuntimeWorkspaceLocalDir, { recursive: true, force: true }).catch(() => {});
+    }
+  };
+
+  if (input.targetIsRemote && !input.probeApiKey) {
+    const managedHome = await prepareManagedCodexHome(process.env, async () => {}, input.companyId, {
+      apiKey: null,
+    });
+    preparedRuntimeWorkspaceLocalDir = await fs.mkdtemp(
+      path.join(os.tmpdir(), `paperclip-codex-envtest-${input.runId}-`),
+    );
+    preparedRuntime = await prepareAdapterExecutionTargetRuntime({
+      runId: input.runId,
+      target: input.target,
+      adapterKey: "codex",
+      workspaceLocalDir: preparedRuntimeWorkspaceLocalDir,
+      // Pass `input.cwd` as the base (not a pre-built per-run subdir).
+      // `prepareRemoteManagedRuntime` itself appends
+      // `.paperclip-runtime/runs/<runId>/workspace` to whatever it gets, so
+      // pre-building a per-run path here would double-nest the run ID.
+      workspaceRemoteDir: input.cwd,
+      installCommand: SANDBOX_INSTALL_COMMAND,
+      detectCommand: input.command,
+      assets: [
+        {
+          key: "home",
+          localDir: managedHome,
+          followSymlinks: true,
+        },
+      ],
+    });
+
+    return {
+      command: input.command,
+      args: input.args,
+      env: preparedRuntime.assetDirs.home
+        ? { ...input.env, CODEX_HOME: preparedRuntime.assetDirs.home }
+        : { ...input.env },
+      cleanup,
+    };
+  }
+
+  if (input.probeApiKey) {
+    const probeHome = input.targetIsRemote
+      ? `/tmp/paperclip-codex-probe-${input.runId}`
+      : path.join(os.tmpdir(), `paperclip-codex-probe-${input.runId}`);
+    return {
+      command: "sh",
+      args: [
+        "-c",
+        'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
+        input.command,
+        ...input.args,
+      ],
+      env: {
+        ...input.env,
+        CODEX_HOME: probeHome,
+        _PAPERCLIP_CODEX_AUTH_JSON: JSON.stringify({ OPENAI_API_KEY: input.probeApiKey }),
+      },
+      cleanup,
+    };
+  }
+
+  return {
+    command: input.command,
+    args: input.args,
+    env: { ...input.env },
+    cleanup,
+  };
+}
+
 export async function testEnvironment(
  ctx: AdapterEnvironmentTestContext,
 ): Promise<AdapterEnvironmentTestResult> {
@ -196,86 +292,80 @@ export async function testEnvironment(
        : isNonEmpty(hostOpenAiKey)
          ? hostOpenAiKey
          : null;
-      let probeCommand = command;
-      let probeArgs = args;
-      const probeEnv: Record<string, string> = { ...env };
-      if (probeApiKey) {
-        const probeHome = targetIsRemote
-          ? `/tmp/paperclip-codex-probe-${runId}`
-          : path.join(os.tmpdir(), `paperclip-codex-probe-${runId}`);
-        probeEnv.CODEX_HOME = probeHome;
-        probeEnv._PAPERCLIP_CODEX_AUTH_JSON = JSON.stringify({ OPENAI_API_KEY: probeApiKey });
-        probeCommand = "sh";
-        // Trap on EXIT removes the probe home (with the API-key auth.json) on
-        // any exit path; we drop `exec` so the wrapper shell stays alive long
-        // enough for the trap to fire after the child returns.
-        probeArgs = [
-          "-c",
-          'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
-          command,
-          ...args,
-        ];
-      }
-
-      const probe = await runAdapterExecutionTargetProcess(
+      const preparedProbe = await prepareCodexHelloProbe({
        runId,
+        companyId: ctx.companyId,
        target,
-        probeCommand,
-        probeArgs,
-        {
-          cwd,
-          env: probeEnv,
-          timeoutSec: 45,
-          graceSec: 5,
-          stdin: "Respond with hello.",
-          onLog: async () => {},
-        },
-      );
-      const parsed = parseCodexJsonl(probe.stdout);
-      const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
-      const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();
+        targetIsRemote,
+        cwd,
+        command,
+        args,
+        env,
+        probeApiKey,
+      });
+      try {
+        const probe = await runAdapterExecutionTargetProcess(
+          runId,
+          target,
+          preparedProbe.command,
+          preparedProbe.args,
+          {
+            cwd,
+            env: preparedProbe.env,
+            timeoutSec: 45,
+            graceSec: 5,
+            stdin: "Respond with hello.",
+            onLog: async () => {},
+          },
+        );
+        const parsed = parseCodexJsonl(probe.stdout);
+        const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
+        const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();

-      if (probe.timedOut) {
-        checks.push({
-          code: "codex_hello_probe_timed_out",
-          level: "warn",
-          message: "Codex hello probe timed out.",
-          hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
-        });
-      } else if ((probe.exitCode ?? 1) === 0) {
-        const summary = parsed.summary.trim();
-        const hasHello = /\bhello\b/i.test(summary);
-        checks.push({
-          code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
-          level: hasHello ? "info" : "warn",
-          message: hasHello
-            ? "Codex hello probe succeeded."
-            : "Codex probe ran but did not return `hello` as expected.",
-          ...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
-          ...(hasHello
-            ? {}
-            : {
-                hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
-              }),
-        });
-      } else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
-        checks.push({
-          code: "codex_hello_probe_auth_required",
-          level: "warn",
-          message: "Codex CLI is installed, but authentication is not ready.",
-          ...(detail ? { detail } : {}),
-          hint: probeApiKey
-            ? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
-            : "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
-        });
-      } else {
-        checks.push({
-          code: "codex_hello_probe_failed",
-          level: "error",
-          message: "Codex hello probe failed.",
-          ...(detail ? { detail } : {}),
-          hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
-        });
+        if (probe.timedOut) {
+          checks.push({
+            code: "codex_hello_probe_timed_out",
+            level: "warn",
+            message: "Codex hello probe timed out.",
+            hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
+          });
+        } else if ((probe.exitCode ?? 1) === 0) {
+          const summary = parsed.summary.trim();
+          const hasHello = /\bhello\b/i.test(summary);
+          checks.push({
+            code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
+            level: hasHello ? "info" : "warn",
+            message: hasHello
+              ? "Codex hello probe succeeded."
+              : "Codex probe ran but did not return `hello` as expected.",
+            ...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
+            ...(hasHello
+              ? {}
+              : {
+                  hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
+                }),
+          });
+        } else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
+          checks.push({
+            code: "codex_hello_probe_auth_required",
+            level: "warn",
+            message: "Codex CLI is installed, but authentication is not ready.",
+            ...(detail ? { detail } : {}),
+            hint: probeApiKey
+              ? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
+              : "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
+          });
+        } else {
+          checks.push({
+            code: "codex_hello_probe_failed",
+            level: "error",
+            message: "Codex hello probe failed.",
+            ...(detail ? { detail } : {}),
+            hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
+          });
+        }
+      } finally {
+        await preparedProbe.cleanup();
      }
    }
  }