paperclip/packages/adapters/codex-local/src/server/test.ts

380 lines
14 KiB
TypeScript
Raw Normal View History

import type {
AdapterEnvironmentCheck,
AdapterEnvironmentTestContext,
AdapterEnvironmentTestResult,
} from "@paperclipai/adapter-utils";
import {
asString,
parseObject,
ensurePathInEnv,
} from "@paperclipai/adapter-utils/server-utils";
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
import {
ensureAdapterExecutionTargetCommandResolvable,
ensureAdapterExecutionTargetDirectory,
Wire per-adapter sandbox install commands through test and execute paths (#5280) > **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-05-05 08:29:28 -07:00
maybeRunSandboxInstallCommand,
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
runAdapterExecutionTargetProcess,
describeAdapterExecutionTarget,
resolveAdapterExecutionTargetCwd,
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
prepareAdapterExecutionTargetRuntime,
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
} from "@paperclipai/adapter-utils/execution-target";
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
import fs from "node:fs/promises";
import path from "node:path";
Write apikey-mode auth.json so Codex CLI 0.122+ can authenticate via OPENAI_API_KEY (#5276) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - The Codex adapter spawns the OpenAI Codex CLI to drive the model > - Codex CLI 0.122 changed how it reads credentials: it ignores `OPENAI_API_KEY` from the environment and reads only `$CODEX_HOME/auth.json` > - Without auth.json, Codex 0.122+ returns 401 "Missing bearer or basic authentication" on `/v1/responses` even when `OPENAI_API_KEY` is forwarded into the sandbox or remote shell > - This pull request materializes an apikey-mode `auth.json` in the managed Codex home (or per-run for the test probe) when an `OPENAI_API_KEY` is configured > - The benefit is configured Codex API keys authenticate correctly with current Codex CLI versions across local, SSH, and sandbox targets ## What Changed - `codex-home.ts`: add `writeApiKeyAuthJson()` and let `prepareManagedCodexHome` accept an `apiKey` override that replaces the symlinked host auth.json with an apikey-mode file - `execute.ts`: pass `envConfig.OPENAI_API_KEY` into `prepareManagedCodexHome` so the managed (and synced-to-remote) Codex home authenticates via the configured key - `test.ts`: when `OPENAI_API_KEY` is available, wrap the hello probe with a small shell that materializes a per-run `$CODEX_HOME/auth.json` before exec'ing codex; key content rides through env to avoid leaking into process listings - Update the `codex_hello_probe_auth_required` hint to explain Codex CLI does not read `OPENAI_API_KEY` from env ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-codex-local` - `pnpm typecheck` clean - Manual: Codex 0.122.0 with empty `CODEX_HOME` returns 401 with env-only auth; with this change it authenticates cleanly ## Risks Low risk — when no API key is configured, behavior is unchanged (no auth.json written, existing chatgpt-mode flow preserved). Apikey-mode `auth.json` is the upstream-supported format. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-05-05 08:00:27 -07:00
import os from "node:os";
import { parseCodexJsonl } from "./parse.js";
Wire per-adapter sandbox install commands through test and execute paths (#5280) > **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-05-05 08:29:28 -07:00
import { SANDBOX_INSTALL_COMMAND } from "../index.js";
import { codexHomeDir, readCodexAuthInfo } from "./quota.js";
import { buildCodexExecArgs } from "./codex-args.js";
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
import { prepareManagedCodexHome } from "./codex-home.js";
function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
if (checks.some((check) => check.level === "error")) return "fail";
if (checks.some((check) => check.level === "warn")) return "warn";
return "pass";
}
function isNonEmpty(value: unknown): value is string {
return typeof value === "string" && value.trim().length > 0;
}
function firstNonEmptyLine(text: string): string {
return (
text
.split(/\r?\n/)
.map((line) => line.trim())
.find(Boolean) ?? ""
);
}
function commandLooksLike(command: string, expected: string): boolean {
const base = path.basename(command).toLowerCase();
return base === expected || base === `${expected}.cmd` || base === `${expected}.exe`;
}
function summarizeProbeDetail(stdout: string, stderr: string, parsedError: string | null): string | null {
const raw = parsedError?.trim() || firstNonEmptyLine(stderr) || firstNonEmptyLine(stdout);
if (!raw) return null;
const clean = raw.replace(/\s+/g, " ").trim();
const max = 240;
return clean.length > max ? `${clean.slice(0, max - 1)}` : clean;
}
const CODEX_AUTH_REQUIRED_RE =
/(?:not\s+logged\s+in|login\s+required|authentication\s+required|unauthorized|invalid(?:\s+or\s+missing)?\s+api(?:[_\s-]?key)?|openai[_\s-]?api[_\s-]?key|api[_\s-]?key.*required|please\s+run\s+`?codex\s+login`?)/i;
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
async function prepareCodexHelloProbe(input: {
runId: string;
companyId: string;
target: AdapterEnvironmentTestContext["executionTarget"] | null;
targetIsRemote: boolean;
cwd: string;
command: string;
args: string[];
env: Record<string, string>;
probeApiKey: string | null;
}): Promise<{
command: string;
args: string[];
env: Record<string, string>;
cleanup: () => Promise<void>;
}> {
let preparedRuntime: Awaited<ReturnType<typeof prepareAdapterExecutionTargetRuntime>> | null = null;
let preparedRuntimeWorkspaceLocalDir: string | null = null;
const cleanup = async () => {
await preparedRuntime?.restoreWorkspace().catch(() => {});
if (preparedRuntimeWorkspaceLocalDir) {
await fs.rm(preparedRuntimeWorkspaceLocalDir, { recursive: true, force: true }).catch(() => {});
}
};
if (input.targetIsRemote && !input.probeApiKey) {
const managedHome = await prepareManagedCodexHome(process.env, async () => {}, input.companyId, {
apiKey: null,
});
preparedRuntimeWorkspaceLocalDir = await fs.mkdtemp(
path.join(os.tmpdir(), `paperclip-codex-envtest-${input.runId}-`),
);
preparedRuntime = await prepareAdapterExecutionTargetRuntime({
runId: input.runId,
target: input.target,
adapterKey: "codex",
workspaceLocalDir: preparedRuntimeWorkspaceLocalDir,
// Pass `input.cwd` as the base (not a pre-built per-run subdir).
// `prepareRemoteManagedRuntime` itself appends
// `.paperclip-runtime/runs/<runId>/workspace` to whatever it gets, so
// pre-building a per-run path here would double-nest the run ID.
workspaceRemoteDir: input.cwd,
installCommand: SANDBOX_INSTALL_COMMAND,
detectCommand: input.command,
assets: [
{
key: "home",
localDir: managedHome,
followSymlinks: true,
},
],
});
return {
command: input.command,
args: input.args,
env: preparedRuntime.assetDirs.home
? { ...input.env, CODEX_HOME: preparedRuntime.assetDirs.home }
: { ...input.env },
cleanup,
};
}
if (input.probeApiKey) {
const probeHome = input.targetIsRemote
? `/tmp/paperclip-codex-probe-${input.runId}`
: path.join(os.tmpdir(), `paperclip-codex-probe-${input.runId}`);
return {
command: "sh",
args: [
"-c",
'set -e; mkdir -p "$CODEX_HOME"; umask 077; printf "%s" "$_PAPERCLIP_CODEX_AUTH_JSON" > "$CODEX_HOME/auth.json"; unset _PAPERCLIP_CODEX_AUTH_JSON; trap \'rm -rf "$CODEX_HOME"\' EXIT INT TERM; "$0" "$@"',
input.command,
...input.args,
],
env: {
...input.env,
CODEX_HOME: probeHome,
_PAPERCLIP_CODEX_AUTH_JSON: JSON.stringify({ OPENAI_API_KEY: input.probeApiKey }),
},
cleanup,
};
}
return {
command: input.command,
args: input.args,
env: { ...input.env },
cleanup,
};
}
export async function testEnvironment(
ctx: AdapterEnvironmentTestContext,
): Promise<AdapterEnvironmentTestResult> {
const checks: AdapterEnvironmentCheck[] = [];
const config = parseObject(ctx.config);
const command = asString(config.command, "codex");
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
const target = ctx.executionTarget ?? null;
const targetIsRemote = target?.kind === "remote";
const cwd = resolveAdapterExecutionTargetCwd(target, asString(config.cwd, ""), process.cwd());
const targetLabel = targetIsRemote
Add cursor sandbox support and fix SSH workspace sync (#4803) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents can run inside sandboxed environments like E2B, or on remote hosts via SSH > - The cursor adapter needs to resolve `cursor-agent` inside sandbox environments where it's installed in `~/.local/bin` > - But when using the default `agent` command on a sandbox target, the adapter didn't know to look in `~/.local/bin/cursor-agent`, causing "command not found" failures > - Additionally, repeated SSH runs failed because `git checkout` during workspace sync conflicted with leftover `.paperclip-runtime` files from previous runs > - This PR adds sandbox-aware command resolution for cursor and fixes the SSH workspace sync conflict > - The benefit is cursor works in E2B sandboxes out of the box, and repeated SSH runs don't fail on workspace sync ## What Changed - `cursor-local`: Added `prepareCursorSandboxCommand` — on sandbox targets, reads the remote `$HOME`, prepends `~/.local/bin` to PATH, and prefers `~/.local/bin/cursor-agent` when the default command is requested; tightened the sandbox command probe to validate the binary exists before launching; preserves explicit custom command overrides - `adapter-utils/ssh.ts`: Added `--force` to git checkout in SSH workspace sync to handle `.paperclip-runtime` untracked file conflicts from previous runs ## Verification - `pnpm test` — all existing and new tests pass, including cursor sandbox probe, sandbox execution, and custom command override tests - `pnpm typecheck` — clean - Manual: configure an E2B environment, run a cursor-local task, verify it resolves cursor-agent from the sandbox install path ## Risks - Low-medium. The `--force` flag on git checkout could discard uncommitted changes in the remote workspace, but the workspace is managed by Paperclip and should not contain user edits. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 16:12:06 -07:00
? ctx.environmentName ?? describeAdapterExecutionTarget(target)
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
: null;
const runId = `codex-envtest-${Date.now()}-${Math.random().toString(16).slice(2)}`;
if (targetLabel) {
checks.push({
code: "codex_environment_target",
level: "info",
message: `Probing inside environment: ${targetLabel}`,
});
}
try {
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
await ensureAdapterExecutionTargetDirectory(runId, target, cwd, {
cwd,
env: {},
createIfMissing: true,
});
checks.push({
code: "codex_cwd_valid",
level: "info",
message: `Working directory is valid: ${cwd}`,
});
} catch (err) {
checks.push({
code: "codex_cwd_invalid",
level: "error",
message: err instanceof Error ? err.message : "Invalid working directory",
detail: cwd,
});
}
const envConfig = parseObject(config.env);
const env: Record<string, string> = {};
for (const [key, value] of Object.entries(envConfig)) {
if (typeof value === "string") env[key] = value;
}
const runtimeEnv = ensurePathInEnv({ ...process.env, ...env });
Wire per-adapter sandbox install commands through test and execute paths (#5280) > **Stacked PR.** Sits on top of the e2b sandbox chain — #5278 (stdin staging) and #5279 (honest-resolvability + login-profiles). The cumulative diff against `master` includes both of those PRs' content; the files touched by *this* PR's commit are the new `maybeRunSandboxInstallCommand` helper in `packages/adapter-utils/src/execution-target.ts` and the per-adapter `index.ts`/`server/test.ts`/`server/execute.ts` wiring under `packages/adapters/{claude,codex,cursor,gemini,opencode,pi}-local/`. The honest resolvability check from #5279 is what gives this PR's install command a meaningful "did it actually land on PATH" follow-up. ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Sandbox execution targets are ephemeral — each fresh lease starts from a template image that may or may not have the agent CLIs preinstalled > - When a CLI isn't preinstalled, the resolvability probe fails at `command -v` and the hello probe never runs > - There's no shared mechanism for "before you probe or provision, install the CLI on this sandbox" > - This pull request adds a `SANDBOX_INSTALL_COMMAND` constant per adapter and a `maybeRunSandboxInstallCommand` helper that runs it via the existing sandbox login shell, captures structured output, and never throws (so the resolvability + hello probe still run after); each adapter's `test()` and `execute()` share the constant so the two callsites can't drift > - The benefit is a fresh sandbox lease without a preinstalled CLI now installs it once via `sh -lc` before the resolvability probe and before managed-runtime provisioning, with a uniform `<adapter>_install_command_run` check on the test report ## What Changed - `packages/adapter-utils/src/execution-target.ts`: add `AdapterSandboxInstallCommandCheck` and `maybeRunSandboxInstallCommand` (runs the install via existing sandbox shell, captures exit/stdout/stderr, returns a structured info/warn check, never throws) - Add `SANDBOX_INSTALL_COMMAND` to each adapter's `index.ts` so `test()` and `execute()` share a single source of truth - Wire each of the 6 affected adapter `testEnvironment()`s to call `maybeRunSandboxInstallCommand` before `ensureAdapterExecutionTargetCommandResolvable` - Pass `installCommand: SANDBOX_INSTALL_COMMAND` through `prepareAdapterExecutionTargetRuntime` in each adapter's `execute()` - Per-adapter install commands use npm globals where possible so binaries land on a PATH segment the template already exports: - claude → `npm install -g @anthropic-ai/claude-code` - codex → `npm install -g @openai/codex` - cursor → `curl https://cursor.com/install -fsS | bash` - gemini → `npm install -g @google/gemini-cli` - opencode → `npm install -g opencode-ai` - pi → `npm install -g @mariozechner/pi-coding-agent` SSH and local targets ignore `installCommand` (SSH runtime takes no such param; local short-circuits before runtime prep), so this is a no-op for non-sandbox environments. ## Verification - `pnpm typecheck` clean - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils` and per-adapter projects pass - Manual sandbox matrix (claude, codex, cursor, gemini, opencode, pi) — each goes `install_command_run → resolvable → hello_probe_passed` (Codex and Pi land on `hello_probe_auth_required`, which is the configured-credentials problem, not an install issue) - SSH no-regression: SSH Claude still passes; the helper short-circuits on non-sandbox targets ## Risks Medium — adds a network/CPU cost (npm install / curl) on every fresh sandbox lease. Cost is bounded (one-time per lease, typically tens of seconds for npm globals), and the helper never throws so a failing install still lets the report run resolvability and hello probes. If a sandbox image already has the CLI, the install is an idempotent reinstall. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-05-05 08:29:28 -07:00
const installCheck = await maybeRunSandboxInstallCommand({
runId,
target,
adapterKey: "codex",
installCommand: SANDBOX_INSTALL_COMMAND,
detectCommand: command,
env,
});
if (installCheck) checks.push(installCheck);
try {
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
await ensureAdapterExecutionTargetCommandResolvable(command, target, cwd, runtimeEnv);
checks.push({
code: "codex_command_resolvable",
level: "info",
message: `Command is executable: ${command}`,
});
} catch (err) {
checks.push({
code: "codex_command_unresolvable",
level: "error",
message: err instanceof Error ? err.message : "Command is not executable",
detail: command,
});
}
const configOpenAiKey = env.OPENAI_API_KEY;
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
const hostOpenAiKey = targetIsRemote ? undefined : process.env.OPENAI_API_KEY;
if (isNonEmpty(configOpenAiKey) || isNonEmpty(hostOpenAiKey)) {
const source = isNonEmpty(configOpenAiKey) ? "adapter config env" : "server environment";
checks.push({
code: "codex_openai_api_key_present",
level: "info",
message: "OPENAI_API_KEY is set for Codex authentication.",
detail: `Detected in ${source}.`,
});
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
} else if (!targetIsRemote) {
// Local-only auth file check. On remote targets, the probe will surface
// any missing-auth errors directly from the remote `codex` invocation.
const codexHome = isNonEmpty(env.CODEX_HOME) ? env.CODEX_HOME : undefined;
const codexAuth = await readCodexAuthInfo(codexHome).catch(() => null);
if (codexAuth) {
checks.push({
code: "codex_native_auth_present",
level: "info",
message: "Codex is authenticated via its own auth configuration.",
detail: codexAuth.email ? `Logged in as ${codexAuth.email}.` : `Credentials found in ${path.join(codexHome ?? codexHomeDir(), "auth.json")}.`,
});
} else {
checks.push({
code: "codex_openai_api_key_missing",
level: "warn",
message: "OPENAI_API_KEY is not set. Codex runs may fail until authentication is configured.",
hint: "Set OPENAI_API_KEY in adapter env, shell environment, or run `codex auth` to log in.",
});
}
}
const canRunProbe =
checks.every((check) => check.code !== "codex_cwd_invalid" && check.code !== "codex_command_unresolvable");
if (canRunProbe) {
if (!commandLooksLike(command, "codex")) {
checks.push({
code: "codex_hello_probe_skipped_custom_command",
level: "info",
message: "Skipped hello probe because command is not `codex`.",
detail: command,
hint: "Use the `codex` CLI command to run the automatic login and installation probe.",
});
} else {
const execArgs = buildCodexExecArgs({ ...config, fastMode: false });
const args = execArgs.args;
if (execArgs.fastModeIgnoredReason) {
checks.push({
code: "codex_fast_mode_unsupported_model",
level: "warn",
message: execArgs.fastModeIgnoredReason,
[codex] Stabilize tests and local maintenance assets (#4423) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - A fast-moving control plane needs stable local tests and repeatable local maintenance tools so contributors can safely split and review work > - Several route suites needed stronger isolation, Codex manual model selection needed a faster-mode option, and local browser cleanup missed Playwright's headless shell binary > - Storybook static output also needed to be preserved as a generated review artifact from the working branch > - This pull request groups the test/local-dev maintenance pieces so they can be reviewed separately from product runtime changes > - The benefit is more predictable contributor verification and cleaner local maintenance without mixing these changes into feature PRs ## What Changed - Added stable Vitest runner support and serialized route/authz test isolation. - Fixed workspace runtime authz route mocks and stabilized Claude/company-import related assertions. - Allowed Codex fast mode for manually selected models. - Broadened the agent browser cleanup script to detect `chrome-headless-shell` as well as Chrome for Testing. - Preserved generated Storybook static output from the source branch. ## Verification - `pnpm exec vitest run src/__tests__/workspace-runtime-routes-authz.test.ts src/__tests__/claude-local-execute.test.ts --config vitest.config.ts` from `server/` passed: 2 files, 19 tests. - `pnpm exec vitest run src/server/codex-args.test.ts --config vitest.config.ts` from `packages/adapters/codex-local/` passed: 1 file, 3 tests. - `bash -n scripts/kill-agent-browsers.sh && scripts/kill-agent-browsers.sh --dry` passed; dry-run detected `chrome-headless-shell` processes without killing them. - `test -f ui/storybook-static/index.html && test -f ui/storybook-static/assets/forms-editors.stories-Dry7qwx2.js` passed. - `git diff --check public-gh/master..pap-2228-test-local-maintenance -- . ':(exclude)ui/storybook-static'` passed. - `pnpm exec vitest run cli/src/__tests__/company-import-export-e2e.test.ts --config cli/vitest.config.ts` did not complete in the isolated split worktree because `paperclipai run` exited during build prep with `TS2688: Cannot find type definition file for 'react'`; this appears to be caused by the worktree dependency symlink setup, not the code under test. - Confirmed this PR does not include `pnpm-lock.yaml`. ## Risks - Medium risk: the stable Vitest runner changes how route/authz tests are scheduled. - Generated `ui/storybook-static` files are large and contain minified third-party output; `git diff --check` reports whitespace inside those generated assets, so reviewers may choose to drop or regenerate that artifact before merge. - No database migrations. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex coding agent based on GPT-5, with shell, git, Paperclip API, and GitHub CLI tool use in the local Paperclip workspace. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge Note: screenshot checklist item is not applicable to source UI behavior; the included Storybook static output is generated artifact preservation from the source branch. --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-24 15:11:42 -05:00
hint: "Switch the agent model to GPT-5.4 or enter a manual model ID to enable Codex Fast mode.",
});
}
Write apikey-mode auth.json so Codex CLI 0.122+ can authenticate via OPENAI_API_KEY (#5276) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - The Codex adapter spawns the OpenAI Codex CLI to drive the model > - Codex CLI 0.122 changed how it reads credentials: it ignores `OPENAI_API_KEY` from the environment and reads only `$CODEX_HOME/auth.json` > - Without auth.json, Codex 0.122+ returns 401 "Missing bearer or basic authentication" on `/v1/responses` even when `OPENAI_API_KEY` is forwarded into the sandbox or remote shell > - This pull request materializes an apikey-mode `auth.json` in the managed Codex home (or per-run for the test probe) when an `OPENAI_API_KEY` is configured > - The benefit is configured Codex API keys authenticate correctly with current Codex CLI versions across local, SSH, and sandbox targets ## What Changed - `codex-home.ts`: add `writeApiKeyAuthJson()` and let `prepareManagedCodexHome` accept an `apiKey` override that replaces the symlinked host auth.json with an apikey-mode file - `execute.ts`: pass `envConfig.OPENAI_API_KEY` into `prepareManagedCodexHome` so the managed (and synced-to-remote) Codex home authenticates via the configured key - `test.ts`: when `OPENAI_API_KEY` is available, wrap the hello probe with a small shell that materializes a per-run `$CODEX_HOME/auth.json` before exec'ing codex; key content rides through env to avoid leaking into process listings - Update the `codex_hello_probe_auth_required` hint to explain Codex CLI does not read `OPENAI_API_KEY` from env ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-codex-local` - `pnpm typecheck` clean - Manual: Codex 0.122.0 with empty `CODEX_HOME` returns 401 with env-only auth; with this change it authenticates cleanly ## Risks Low risk — when no API key is configured, behavior is unchanged (no auth.json written, existing chatgpt-mode flow preserved). Apikey-mode `auth.json` is the upstream-supported format. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-05-05 08:00:27 -07:00
// Codex CLI (>= 0.122) ignores the OPENAI_API_KEY env var and only reads
// credentials from $CODEX_HOME/auth.json. When we have a key available,
// wrap the probe with a shell that materializes a per-run auth.json so
// the CLI can authenticate. The key content is passed via env (not on
// the command line) to avoid leaking it into process listings.
const probeApiKey = isNonEmpty(configOpenAiKey)
? configOpenAiKey
: isNonEmpty(hostOpenAiKey)
? hostOpenAiKey
: null;
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
const preparedProbe = await prepareCodexHelloProbe({
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
runId,
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
companyId: ctx.companyId,
Add dedicated environment settings page and test-in-environment (#4798) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents run inside environments (local, SSH, E2B sandbox) > - Operators need to configure and manage these environments > - But environment settings were buried inside the general company settings page, making them hard to find > - Additionally, when testing an agent from the configuration form, the test always ran locally regardless of which environment was selected > - This PR moves environments into a dedicated top-level company settings section and wires the "Test Environment" button to run inside the selected environment > - The benefit is operators can find and manage environments more easily, and the test button now validates the actual environment the agent will use ## What Changed - Added a dedicated `CompanyEnvironments` settings page with its own route and sidebar entry - Updated `CompanySettingsSidebar` and `CompanySettingsNav` to include the new environments section - Modified the agent test route (`POST /agents/:id/test`) to accept an optional `environmentId` parameter - Updated all adapter `test.ts` handlers to resolve and use the specified execution target environment - Added `resolveTestExecutionTarget` to `execution-target.ts` for remote environment test resolution with cwd fallback - Moved the "Test Environment" button and its feedback display into the `NewAgent` page footer for better UX flow ## Verification - `pnpm test` — all existing and new tests pass - `pnpm typecheck` — clean - Manual: navigate to Company Settings, confirm "Environments" appears as a top-level section - Manual: configure an agent with a non-local environment, click "Test Environment", confirm the test runs inside that environment ## Risks - Low risk. UI-only routing change for the settings page. The test-in-environment change adds an optional parameter with a local fallback, so existing behavior is preserved when no environment is specified. ## Model Used Codex GPT 5.4 high via Paperclip. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
2026-04-29 15:56:13 -07:00
target,
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
targetIsRemote,
cwd,
command,
args,
env,
probeApiKey,
});
try {
const probe = await runAdapterExecutionTargetProcess(
runId,
target,
preparedProbe.command,
preparedProbe.args,
{
cwd,
env: preparedProbe.env,
timeoutSec: 45,
graceSec: 5,
stdin: "Respond with hello.",
onLog: async () => {},
},
);
const parsed = parseCodexJsonl(probe.stdout);
const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();
Stabilize runtime probes and Codex env tests (#5445) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapters expose a Test action that probes the configured runtime — install, resolvability, hello — to give operators a fast yes/no on whether an environment is healthy > - The Codex test path was running its hello probe directly without going through the managed-runtime preparation that production runs use, so a healthy production setup could still report a probe failure > - The plugin worker manager wasn't surfacing terminated workers cleanly, leaving the runtime probe waiting on a dead worker until the request timed out > - This pull request routes the Codex test probe through `prepareAdapterExecutionTargetRuntime` (so it sees the same managed Codex home production sees), exposes `commandCwd` on `createCommandManagedRuntimeClient` so callers can target a per-probe directory without leaking the workspace `remoteCwd`, and propagates plugin-worker termination as a usable error instead of a hang > - The benefit is the Codex Test action mirrors production behavior end-to-end, and probes against a terminated plugin worker fail fast instead of timing out ## What Changed - `packages/adapter-utils/src/command-managed-runtime.ts`: rename the `remoteCwd` knob to `commandCwd` so callers can target a per-probe directory without inheriting the workspace cwd; matching test coverage in `command-managed-runtime.test.ts` - `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`: small fixes to keep callback bridge stop semantics deterministic - `packages/adapters/codex-local/src/server/test.ts`: thread the Codex hello probe through `prepareAdapterExecutionTargetRuntime` + `prepareManagedCodexHome` so the probe sees the same managed home production sees; new `test.remote.test.ts` covers the remote probe path - `packages/adapters/cursor-local/src/server/execute.ts`: small probe-side cleanup that aligns with the new commandCwd contract - `server/src/services/plugin-worker-manager.ts`: surface plugin-worker termination as a structured error so callers fail fast; new `plugin-worker-terminated.cjs` fixture and `plugin-worker-manager.test.ts` cases pin the behavior ## Verification - `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils --project @paperclipai/adapter-codex-local --project @paperclipai/adapter-cursor-local --project @paperclipai/server` — 1749/1750 passing (1 unrelated skip) - `pnpm typecheck` clean ## Risks Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming on an internal helper used only by adapter test/execute paths in this repo. The plugin-worker-terminated path was previously a hang; failing fast may surface latent timeouts as explicit termination errors in callers that already expected them. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover commandCwd, plugin-worker termination, and Codex remote test path - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- > **Stacked PR.** Sits on top of #5444 which adds the per-run runtime API surface this PR builds on. Cumulative diff against `master` includes that PR's content; the files touched by *this* PR's commit are listed under "What Changed" above. Will rebase onto `master` and force-push once #5444 merges.
2026-05-07 14:52:31 -07:00
if (probe.timedOut) {
checks.push({
code: "codex_hello_probe_timed_out",
level: "warn",
message: "Codex hello probe timed out.",
hint: "Retry the probe. If this persists, verify Codex can run `Respond with hello` from this directory manually.",
});
} else if ((probe.exitCode ?? 1) === 0) {
const summary = parsed.summary.trim();
const hasHello = /\bhello\b/i.test(summary);
checks.push({
code: hasHello ? "codex_hello_probe_passed" : "codex_hello_probe_unexpected_output",
level: hasHello ? "info" : "warn",
message: hasHello
? "Codex hello probe succeeded."
: "Codex probe ran but did not return `hello` as expected.",
...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
...(hasHello
? {}
: {
hint: "Try the probe manually (`codex exec --json -` then prompt: Respond with hello) to inspect full output.",
}),
});
} else if (CODEX_AUTH_REQUIRED_RE.test(authEvidence)) {
checks.push({
code: "codex_hello_probe_auth_required",
level: "warn",
message: "Codex CLI is installed, but authentication is not ready.",
...(detail ? { detail } : {}),
hint: probeApiKey
? "OPENAI_API_KEY was provided but Codex still rejected the request. Verify the key is valid for the OpenAI Responses API (e.g. `curl -H \"Authorization: Bearer $OPENAI_API_KEY\" https://api.openai.com/v1/models`), or run `codex login` and seed `~/.codex/auth.json`."
: "Codex CLI does not read OPENAI_API_KEY from the environment; set OPENAI_API_KEY in this adapter's config (so Paperclip writes it to `$CODEX_HOME/auth.json`) or run `codex login` on the host first.",
});
} else {
checks.push({
code: "codex_hello_probe_failed",
level: "error",
message: "Codex hello probe failed.",
...(detail ? { detail } : {}),
hint: "Run `codex exec --json -` manually in this working directory and prompt `Respond with hello` to debug.",
});
}
} finally {
await preparedProbe.cleanup();
}
}
}
return {
adapterType: ctx.adapterType,
status: summarizeStatus(checks),
checks,
testedAt: new Date().toISOString(),
};
}