Stabilize runtime probes and Codex env tests (#5445)

## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-06-14 01:50:39 +09:00 · 2026-05-07 14:52:31 -07:00 · 2026-05-07 14:52:31 -07:00 · fe3904f434
commit fe3904f434
parent 12cb7b40fd
12 changed files with 639 additions and 90 deletions
--- a/packages/adapters/codex-local/src/server/test.remote.test.ts
+++ b/packages/adapters/codex-local/src/server/test.remote.test.ts
@ -0,0 +1,152 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import type { AdapterExecutionTarget } from "@paperclipai/adapter-utils/execution-target";
+
+const {
+  ensureAdapterExecutionTargetDirectory,
+  ensureAdapterExecutionTargetCommandResolvable,
+  maybeRunSandboxInstallCommand,
+  runAdapterExecutionTargetProcess,
+  describeAdapterExecutionTarget,
+  resolveAdapterExecutionTargetCwd,
+  prepareAdapterExecutionTargetRuntime,
+  prepareManagedCodexHome,
+  restoreWorkspace,
+} = vi.hoisted(() => {
+  const restoreWorkspace = vi.fn(async () => {});
+  return {
+    ensureAdapterExecutionTargetDirectory: vi.fn(async () => {}),
+    ensureAdapterExecutionTargetCommandResolvable: vi.fn(async () => {}),
+    maybeRunSandboxInstallCommand: vi.fn(async () => null),
+    runAdapterExecutionTargetProcess: vi.fn(async () => ({
+      exitCode: 0,
+      signal: null,
+      timedOut: false,
+      stdout: [
+        "{\"type\":\"thread.started\",\"thread_id\":\"thread-1\"}",
+        "{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"hello\"}}",
+        "{\"type\":\"turn.completed\",\"usage\":{\"input_tokens\":1,\"cached_input_tokens\":0,\"output_tokens\":1}}",
+      ].join("\n"),
+      stderr: "",
+      pid: 123,
+      startedAt: new Date().toISOString(),
+    })),
+    describeAdapterExecutionTarget: vi.fn(() => "QA SSH"),
+    resolveAdapterExecutionTargetCwd: vi.fn((target, configuredCwd, fallbackCwd) => {
+      if (typeof configuredCwd === "string" && configuredCwd.trim().length > 0) return configuredCwd;
+      if (target && typeof target === "object" && "remoteCwd" in target && typeof target.remoteCwd === "string") {
+        return target.remoteCwd;
+      }
+      return fallbackCwd;
+    }),
+    prepareAdapterExecutionTargetRuntime: vi.fn(async () => ({
+      target: null,
+      workspaceRemoteDir: "/remote/workspace/.paperclip-runtime/runs/test/workspace",
+      runtimeRootDir: "/remote/workspace/.paperclip-runtime/runs/test/workspace/.paperclip-runtime/codex",
+      assetDirs: {
+        home: "/remote/workspace/.paperclip-runtime/runs/test/workspace/.paperclip-runtime/codex/home",
+      },
+      restoreWorkspace,
+    })),
+    prepareManagedCodexHome: vi.fn(async () => "/tmp/paperclip-managed-codex-home"),
+    restoreWorkspace,
+  };
+});
+
+vi.mock("@paperclipai/adapter-utils/execution-target", async () => {
+  const actual = await vi.importActual<typeof import("@paperclipai/adapter-utils/execution-target")>(
+    "@paperclipai/adapter-utils/execution-target",
+  );
+  return {
+    ...actual,
+    ensureAdapterExecutionTargetDirectory,
+    ensureAdapterExecutionTargetCommandResolvable,
+    maybeRunSandboxInstallCommand,
+    runAdapterExecutionTargetProcess,
+    describeAdapterExecutionTarget,
+    resolveAdapterExecutionTargetCwd,
+    prepareAdapterExecutionTargetRuntime,
+  };
+});
+
+vi.mock("./codex-home.js", async () => {
+  const actual = await vi.importActual<typeof import("./codex-home.js")>("./codex-home.js");
+  return {
+    ...actual,
+    prepareManagedCodexHome,
+  };
+});
+
+import { testEnvironment } from "./test.js";
+
+describe("codex remote environment diagnostics", () => {
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("stages managed CODEX_HOME in an isolated runtime dir and keeps the probe cwd on the original remote workspace", async () => {
+    const remoteTarget: AdapterExecutionTarget = {
+      kind: "remote",
+      transport: "ssh",
+      remoteCwd: "/remote/workspace",
+      spec: {
+        host: "127.0.0.1",
+        port: 22,
+        username: "agent",
+        privateKey: "PRIVATE KEY",
+        knownHosts: "KNOWN HOSTS",
+        remoteCwd: "/remote/workspace",
+        remoteWorkspacePath: "/remote/workspace",
+        strictHostKeyChecking: false,
+      },
+    };
+
+    const result = await testEnvironment({
+      companyId: "company-1",
+      adapterType: "codex_local",
+      config: {
+        command: "codex",
+      },
+      executionTarget: remoteTarget,
+      environmentName: "QA SSH",
+    });
+
+    expect(result.status).toBe("pass");
+    expect(result.checks.some((check) => check.code === "codex_hello_probe_passed")).toBe(true);
+    expect(prepareManagedCodexHome).toHaveBeenCalledTimes(1);
+    expect(prepareAdapterExecutionTargetRuntime).toHaveBeenCalledTimes(1);
+    const runtimeCalls = prepareAdapterExecutionTargetRuntime.mock.calls as unknown as Array<[
+      {
+        workspaceLocalDir: string;
+        target?: { remoteCwd?: string };
+        workspaceRemoteDir?: string;
+      },
+    ]>;
+    const runtimeInput = runtimeCalls[0]?.[0];
+    expect(runtimeInput?.workspaceLocalDir).toContain(`${os.tmpdir()}/paperclip-codex-envtest-`);
+    expect(runtimeInput?.workspaceLocalDir).not.toBe("/remote/workspace");
+    expect(await fs.stat(runtimeInput!.workspaceLocalDir).catch(() => null)).toBeNull();
+    expect(runtimeInput?.target?.remoteCwd).toBe("/remote/workspace");
+    // `workspaceRemoteDir` is the base path passed to the runtime; the
+    // helper's per-run subdirectory is appended internally inside
+    // `prepareRemoteManagedRuntime`. Pre-building a per-run prefix here
+    // would double-nest the run id in the final path.
+    expect(runtimeInput?.workspaceRemoteDir).toBe("/remote/workspace");
+    expect(runAdapterExecutionTargetProcess).toHaveBeenCalledTimes(1);
+    const probeCall = runAdapterExecutionTargetProcess.mock.calls[0] as unknown as
+      | [string, { kind: string; remoteCwd: string }, string, string[], { cwd: string; env: Record<string, string> }]
+      | undefined;
+    expect(probeCall?.[1]).toMatchObject({
+      kind: "remote",
+      remoteCwd: "/remote/workspace",
+    });
+    expect(probeCall?.[4]).toMatchObject({
+      cwd: "/remote/workspace",
+      env: expect.objectContaining({
+        CODEX_HOME: "/remote/workspace/.paperclip-runtime/runs/test/workspace/.paperclip-runtime/codex/home",
+      }),
+    });
+    expect(restoreWorkspace).toHaveBeenCalledTimes(1);
+  });
+});
--- a/packages/adapters/codex-local/src/server/test.ts
+++ b/packages/adapters/codex-local/src/server/test.ts
@ -15,13 +15,16 @@ import {
  runAdapterExecutionTargetProcess,
  describeAdapterExecutionTarget,
  resolveAdapterExecutionTargetCwd,
+  prepareAdapterExecutionTargetRuntime,
 } from "@paperclipai/adapter-utils/execution-target";
+import fs from "node:fs/promises";
 import path from "node:path";
 import os from "node:os";
 import { parseCodexJsonl } from "./parse.js";
 import { SANDBOX_INSTALL_COMMAND } from "../index.js";
 import { codexHomeDir, readCodexAuthInfo } from "./quota.js";
 import { buildCodexExecArgs } from "./codex-args.js";
+import { prepareManagedCodexHome } from "./codex-home.js";

 function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
  if (checks.some((check) => check.level === "error")) return "fail";
@ -58,6 +61,99 @@ function summarizeProbeDetail(stdout: string, stderr: string, parsedError: strin
 const CODEX_AUTH_REQUIRED_RE =
  /(?:not\s+logged\s+in|login\s+required|authentication\s+required|unauthorized|invalid(?:\s+or\s+missing)?\s+api(?:[_\s-]?key)?|openai[_\s-]?api[_\s-]?key|api[_\s-]?key.*required|please\s+run\s+`?codex\s+login`?)/i;

+async function prepareCodexHelloProbe(input: {
+  runId: string;
+  companyId: string;
+  target: AdapterEnvironmentTestContext["executionTarget"] | null;
+  targetIsRemote: boolean;
+  cwd: string;
+  command: string;
+  args: string[];
+  env: Record<string, string>;
+  probeApiKey: string | null;
+}): Promise<{
+  command: string;
+  args: string[];
+  env: Record<string, string>;
+  cleanup: () => Promise<void>;
+}> {
+  let preparedRuntime: Awaited<ReturnType<typeof prepareAdapterExecutionTargetRuntime>> | null = null;
+  let preparedRuntimeWorkspaceLocalDir: string | null = null;
+
+  const cleanup = async () => {
+    await preparedRuntime?.restoreWorkspace().catch(() => {});
+    if (preparedRuntimeWorkspaceLocalDir) {
+      await fs.rm(preparedRuntimeWorkspaceLocalDir, { recursive: true, force: true }).catch(() => {});
+    }
+  };
+
+  if (input.targetIsRemote && !input.probeApiKey) {
+    const managedHome = await prepareManagedCodexHome(process.env, async () => {}, input.companyId, {
+      apiKey: null,
+    });
+    preparedRuntimeWorkspaceLocalDir = await fs.mkdtemp(
+      path.join(os.tmpdir(), `paperclip-codex-envtest-${input.runId}-`),
+    );
+    preparedRuntime = await prepareAdapterExecutionTargetRuntime({
+      runId: input.runId,
+      target: input.target,
+      adapterKey: "codex",
+      workspaceLocalDir: preparedRuntimeWorkspaceLocalDir,
+      // Pass `input.cwd` as the base (not a pre-built per-run subdir).
+      // `prepareRemoteManagedRuntime` itself appends
+      // `.paperclip-runtime/runs/<runId>/workspace` to whatever it gets, so
+      // pre-building a per-run path here would double-nest the run ID.
+      workspaceRemoteDir: input.cwd,
+      installCommand: SANDBOX_INSTALL_COMMAND,
+      detectCommand: input.command,
+      assets: [
+        {
+          key: "home",
+          localDir: managedHome,
+          followSymlinks: true,
+        },
+      ],
+    });
+
+    return {
+      command: input.command,
+      args: input.args,
+      env: preparedRuntime.assetDirs.home
+        ? { ...input.env, CODEX_HOME: preparedRuntime.assetDirs.home }
+        : { ...input.env },
+      cleanup,
+    };
+  }
+
+  if (input.probeApiKey) {
+    const probeHome = input.targetIsRemote
+      ? `/tmp/paperclip-codex-probe-${input.runId}`
+      : path.join(os.tmpdir(), `paperclip-codex-probe-${input.runId}`);
+    return {
+      command: "sh",
+      args: [
+        "-c",
+        'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
+        input.command,
+        ...input.args,
+      ],
+      env: {
+        ...input.env,
+        CODEX_HOME: probeHome,
+        _PAPERCLIP_CODEX_AUTH_JSON: JSON.stringify({ OPENAI_API_KEY: input.probeApiKey }),
+      },
+      cleanup,
+    };
+  }
+
+  return {
+    command: input.command,
+    args: input.args,
+    env: { ...input.env },
+    cleanup,
+  };
+}
+
 export async function testEnvironment(
  ctx: AdapterEnvironmentTestContext,
 ): Promise<AdapterEnvironmentTestResult> {
@ -196,86 +292,80 @@ export async function testEnvironment(
        : isNonEmpty(hostOpenAiKey)
          ? hostOpenAiKey
          : null;
-      let probeCommand = command;
-      let probeArgs = args;
-      const probeEnv: Record<string, string> = { ...env };
-      if (probeApiKey) {
-        const probeHome = targetIsRemote
-          ? `/tmp/paperclip-codex-probe-${runId}`
-          : path.join(os.tmpdir(), `paperclip-codex-probe-${runId}`);
-        probeEnv.CODEX_HOME = probeHome;
-        probeEnv._PAPERCLIP_CODEX_AUTH_JSON = JSON.stringify({ OPENAI_API_KEY: probeApiKey });
-        probeCommand = "sh";
-        // Trap on EXIT removes the probe home (with the API-key auth.json) on
-        // any exit path; we drop `exec` so the wrapper shell stays alive long
-        // enough for the trap to fire after the child returns.
-        probeArgs = [
-          "-c",
-          'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
-          command,
-          ...args,
-        ];
-      }
-
-      const probe = await runAdapterExecutionTargetProcess(
+      const preparedProbe = await prepareCodexHelloProbe({
        runId,
+        companyId: ctx.companyId,
        target,
-        probeCommand,
-        probeArgs,
-        {
-          cwd,
-          env: probeEnv,
-          timeoutSec: 45,
-          graceSec: 5,
-          stdin: "Respond with hello.",
-          onLog: async () => {},
-        },
-      );
-      const parsed = parseCodexJsonl(probe.stdout);
-      const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
-      const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();
+        targetIsRemote,
+        cwd,
+        command,
+        args,
+        env,
+        probeApiKey,
+      });
+      try {
+        const probe = await runAdapterExecutionTargetProcess(
+          runId,
+          target,
+          preparedProbe.command,
+          preparedProbe.args,
+          {
+            cwd,
+            env: preparedProbe.env,
+            timeoutSec: 45,
+            graceSec: 5,
+            stdin: "Respond with hello.",
+            onLog: async () => {},
+          },
+        );
+        const parsed = parseCodexJsonl(probe.stdout);
+        const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
+        const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();

-      if (probe.timedOut) {
-        checks.push({
-          code: "codex_hello_probe_timed_out",
-          level: "warn",
-          message: "Codex hello probe timed out.",
-          hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
-        });
-      } else if ((probe.exitCode ?? 1) === 0) {
-        const summary = parsed.summary.trim();
-        const hasHello = /\bhello\b/i.test(summary);
-        checks.push({
-          code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
-          level: hasHello ? "info" : "warn",
-          message: hasHello
-            ? "Codex hello probe succeeded."
-            : "Codex probe ran but did not return `hello` as expected.",
-          ...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
-          ...(hasHello
-            ? {}
-            : {
-                hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
-              }),
-        });
-      } else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
-        checks.push({
-          code: "codex_hello_probe_auth_required",
-          level: "warn",
-          message: "Codex CLI is installed, but authentication is not ready.",
-          ...(detail ? { detail } : {}),
-          hint: probeApiKey
-            ? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
-            : "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
-        });
-      } else {
-        checks.push({
-          code: "codex_hello_probe_failed",
-          level: "error",
-          message: "Codex hello probe failed.",
-          ...(detail ? { detail } : {}),
-          hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
-        });
+        if (probe.timedOut) {
+          checks.push({
+            code: "codex_hello_probe_timed_out",
+            level: "warn",
+            message: "Codex hello probe timed out.",
+            hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
+          });
+        } else if ((probe.exitCode ?? 1) === 0) {
+          const summary = parsed.summary.trim();
+          const hasHello = /\bhello\b/i.test(summary);
+          checks.push({
+            code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
+            level: hasHello ? "info" : "warn",
+            message: hasHello
+              ? "Codex hello probe succeeded."
+              : "Codex probe ran but did not return `hello` as expected.",
+            ...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
+            ...(hasHello
+              ? {}
+              : {
+                  hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
+                }),
+          });
+        } else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
+          checks.push({
+            code: "codex_hello_probe_auth_required",
+            level: "warn",
+            message: "Codex CLI is installed, but authentication is not ready.",
+            ...(detail ? { detail } : {}),
+            hint: probeApiKey
+              ? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
+              : "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
+          });
+        } else {
+          checks.push({
+            code: "codex_hello_probe_failed",
+            level: "error",
+            message: "Codex hello probe failed.",
+            ...(detail ? { detail } : {}),
+            hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
+          });
+        }
+      } finally {
+        await preparedProbe.cleanup();
      }
    }
  }