mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-14 01:50:39 +09:00
Stabilize runtime probes and Codex env tests (#5445)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Adapters expose a Test action that probes the configured runtime —
install, resolvability, hello — to give operators a fast yes/no on
whether an environment is healthy
> - The Codex test path was running its hello probe directly without
going through the managed-runtime preparation that production runs use,
so a healthy production setup could still report a probe failure
> - The plugin worker manager wasn't surfacing terminated workers
cleanly, leaving the runtime probe waiting on a dead worker until the
request timed out
> - This pull request routes the Codex test probe through
`prepareAdapterExecutionTargetRuntime` (so it sees the same managed
Codex home production sees), exposes `commandCwd` on
`createCommandManagedRuntimeClient` so callers can target a per-probe
directory without leaking the workspace `remoteCwd`, and propagates
plugin-worker termination as a usable error instead of a hang
> - The benefit is the Codex Test action mirrors production behavior
end-to-end, and probes against a terminated plugin worker fail fast
instead of timing out
## What Changed
- `packages/adapter-utils/src/command-managed-runtime.ts`: rename the
`remoteCwd` knob to `commandCwd` so callers can target a per-probe
directory without inheriting the workspace cwd; matching test coverage
in `command-managed-runtime.test.ts`
- `packages/adapter-utils/src/sandbox-callback-bridge.{ts,test.ts}`:
small fixes to keep callback bridge stop semantics deterministic
- `packages/adapters/codex-local/src/server/test.ts`: thread the Codex
hello probe through `prepareAdapterExecutionTargetRuntime` +
`prepareManagedCodexHome` so the probe sees the same managed home
production sees; new `test.remote.test.ts` covers the remote probe path
- `packages/adapters/cursor-local/src/server/execute.ts`: small
probe-side cleanup that aligns with the new commandCwd contract
- `server/src/services/plugin-worker-manager.ts`: surface plugin-worker
termination as a structured error so callers fail fast; new
`plugin-worker-terminated.cjs` fixture and
`plugin-worker-manager.test.ts` cases pin the behavior
## Verification
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils
--project @paperclipai/adapter-codex-local --project
@paperclipai/adapter-cursor-local --project @paperclipai/server` —
1749/1750 passing (1 unrelated skip)
- `pnpm typecheck` clean
## Risks
Low–medium. The `remoteCwd → commandCwd` rename is a parameter renaming
on an internal helper used only by adapter test/execute paths in this
repo. The plugin-worker-terminated path was previously a hang; failing
fast may surface latent timeouts as explicit termination errors in
callers that already expected them.
## Model Used
Claude Opus 4.7 (1M context)
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — new tests cover
commandCwd, plugin-worker termination, and Codex remote test path
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
---
> **Stacked PR.** Sits on top of #5444 which adds the per-run runtime
API surface this PR builds on. Cumulative diff against `master` includes
that PR's content; the files touched by *this* PR's commit are listed
under "What Changed" above. Will rebase onto `master` and force-push
once #5444 merges.
This commit is contained in:
parent
12cb7b40fd
commit
fe3904f434
12 changed files with 639 additions and 90 deletions
|
|
@ -131,4 +131,90 @@ describe("command managed runtime", () => {
|
|||
.toMatchObject({ code: "ENOENT" });
|
||||
expect(calls.every((call) => call.stdin == null)).toBe(true);
|
||||
});
|
||||
|
||||
it("runs setup commands from the existing sandbox cwd when staging into a nested remote workspace dir", async () => {
|
||||
const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-command-runtime-nested-"));
|
||||
cleanupDirs.push(rootDir);
|
||||
|
||||
const localWorkspaceDir = path.join(rootDir, "local-workspace");
|
||||
const remoteBaseDir = path.join(rootDir, "remote-base");
|
||||
const remoteWorkspaceDir = path.join(remoteBaseDir, ".paperclip-runtime", "runs", "test", "workspace");
|
||||
await mkdir(localWorkspaceDir, { recursive: true });
|
||||
await mkdir(remoteBaseDir, { recursive: true });
|
||||
await writeFile(path.join(localWorkspaceDir, "README.md"), "local workspace\n", "utf8");
|
||||
|
||||
const calls: Array<{
|
||||
command: string;
|
||||
args?: string[];
|
||||
cwd?: string;
|
||||
env?: Record<string, string>;
|
||||
stdin?: string;
|
||||
timeoutMs?: number;
|
||||
}> = [];
|
||||
const runner = {
|
||||
execute: async (input: {
|
||||
command: string;
|
||||
args?: string[];
|
||||
cwd?: string;
|
||||
env?: Record<string, string>;
|
||||
stdin?: string;
|
||||
timeoutMs?: number;
|
||||
}): Promise<RunProcessResult> => {
|
||||
calls.push({ ...input });
|
||||
const startedAt = new Date().toISOString();
|
||||
try {
|
||||
const result = await execFile(input.command === "sh" ? "/bin/sh" : input.command, input.args ?? [], {
|
||||
cwd: input.cwd,
|
||||
env: {
|
||||
...process.env,
|
||||
...input.env,
|
||||
},
|
||||
maxBuffer: 32 * 1024 * 1024,
|
||||
timeout: input.timeoutMs,
|
||||
});
|
||||
return {
|
||||
exitCode: 0,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
stdout: result.stdout,
|
||||
stderr: result.stderr,
|
||||
pid: null,
|
||||
startedAt,
|
||||
};
|
||||
} catch (error) {
|
||||
const err = error as NodeJS.ErrnoException & {
|
||||
stdout?: string;
|
||||
stderr?: string;
|
||||
code?: string | number | null;
|
||||
signal?: NodeJS.Signals | null;
|
||||
killed?: boolean;
|
||||
};
|
||||
return {
|
||||
exitCode: typeof err.code === "number" ? err.code : null,
|
||||
signal: err.signal ?? null,
|
||||
timedOut: Boolean(err.killed && input.timeoutMs),
|
||||
stdout: err.stdout ?? "",
|
||||
stderr: err.stderr ?? "",
|
||||
pid: null,
|
||||
startedAt,
|
||||
};
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
await prepareCommandManagedRuntime({
|
||||
runner,
|
||||
spec: {
|
||||
remoteCwd: remoteBaseDir,
|
||||
timeoutMs: 30_000,
|
||||
},
|
||||
adapterKey: "codex",
|
||||
workspaceLocalDir: localWorkspaceDir,
|
||||
workspaceRemoteDir: remoteWorkspaceDir,
|
||||
});
|
||||
|
||||
expect(calls.length).toBeGreaterThan(0);
|
||||
expect(calls.every((call) => call.cwd === remoteBaseDir)).toBe(true);
|
||||
await expect(readFile(path.join(remoteWorkspaceDir, "README.md"), "utf8")).resolves.toBe("local workspace\n");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ function requireSuccessfulResult(result: RunProcessResult, action: string): void
|
|||
|
||||
export function createCommandManagedRuntimeClient(input: {
|
||||
runner: CommandManagedRuntimeRunner;
|
||||
remoteCwd: string;
|
||||
commandCwd: string;
|
||||
timeoutMs: number;
|
||||
shellCommand?: "bash" | "sh" | null;
|
||||
}): SandboxManagedRuntimeClient {
|
||||
|
|
@ -66,7 +66,7 @@ export function createCommandManagedRuntimeClient(input: {
|
|||
const result = await input.runner.execute({
|
||||
command: shellCommand,
|
||||
args: ["-lc", script],
|
||||
cwd: input.remoteCwd,
|
||||
cwd: input.commandCwd,
|
||||
stdin: opts.stdin,
|
||||
timeoutMs: opts.timeoutMs ?? input.timeoutMs,
|
||||
});
|
||||
|
|
@ -117,7 +117,7 @@ export function createCommandManagedRuntimeClient(input: {
|
|||
const result = await input.runner.execute({
|
||||
command: shellCommand,
|
||||
args: ["-lc", `rm -rf ${shellQuote(remotePath)}`],
|
||||
cwd: input.remoteCwd,
|
||||
cwd: input.commandCwd,
|
||||
timeoutMs: input.timeoutMs,
|
||||
});
|
||||
requireSuccessfulResult(result, `remove ${remotePath}`);
|
||||
|
|
@ -126,7 +126,7 @@ export function createCommandManagedRuntimeClient(input: {
|
|||
const result = await input.runner.execute({
|
||||
command: shellCommand,
|
||||
args: ["-lc", command],
|
||||
cwd: input.remoteCwd,
|
||||
cwd: input.commandCwd,
|
||||
timeoutMs: options.timeoutMs,
|
||||
});
|
||||
requireSuccessfulResult(result, command);
|
||||
|
|
@ -149,6 +149,7 @@ export async function prepareCommandManagedRuntime(input: {
|
|||
}): Promise<PreparedSandboxManagedRuntime> {
|
||||
const timeoutMs = input.spec.timeoutMs && input.spec.timeoutMs > 0 ? input.spec.timeoutMs : 300_000;
|
||||
const workspaceRemoteDir = input.workspaceRemoteDir ?? input.spec.remoteCwd;
|
||||
const commandCwd = input.spec.remoteCwd;
|
||||
const runtimeSpec: SandboxRemoteExecutionSpec = {
|
||||
transport: "sandbox",
|
||||
provider: input.spec.providerKey ?? "sandbox",
|
||||
|
|
@ -159,7 +160,7 @@ export async function prepareCommandManagedRuntime(input: {
|
|||
};
|
||||
const client = createCommandManagedRuntimeClient({
|
||||
runner: input.runner,
|
||||
remoteCwd: workspaceRemoteDir,
|
||||
commandCwd,
|
||||
timeoutMs,
|
||||
shellCommand: input.spec.shellCommand,
|
||||
});
|
||||
|
|
@ -176,7 +177,7 @@ export async function prepareCommandManagedRuntime(input: {
|
|||
const probe = await input.runner.execute({
|
||||
command: shellCommand,
|
||||
args: ["-lc", `command -v ${shellQuote(detectCommand)} >/dev/null 2>&1`],
|
||||
cwd: workspaceRemoteDir,
|
||||
cwd: commandCwd,
|
||||
timeoutMs,
|
||||
});
|
||||
if (!probe.timedOut && (probe.exitCode ?? 1) === 0) {
|
||||
|
|
@ -195,7 +196,7 @@ export async function prepareCommandManagedRuntime(input: {
|
|||
const result = await input.runner.execute({
|
||||
command: shellCommand,
|
||||
args: ["-lc", installCommand],
|
||||
cwd: workspaceRemoteDir,
|
||||
cwd: commandCwd,
|
||||
timeoutMs,
|
||||
});
|
||||
// A failed install is not always fatal: the CLI may already be on PATH
|
||||
|
|
|
|||
|
|
@ -422,6 +422,53 @@ describe("sandbox callback bridge", () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("handles SSH queue polling failures without emitting an unhandled rejection", async () => {
|
||||
const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-bridge-ssh-failure-"));
|
||||
cleanupDirs.push(rootDir);
|
||||
|
||||
const queueDir = path.posix.join(rootDir, "queue");
|
||||
const unhandled: unknown[] = [];
|
||||
const onUnhandledRejection = (reason: unknown) => {
|
||||
unhandled.push(reason);
|
||||
};
|
||||
process.on("unhandledRejection", onUnhandledRejection);
|
||||
|
||||
try {
|
||||
const worker = await startSandboxCallbackBridgeWorker({
|
||||
client: {
|
||||
makeDir: async () => {},
|
||||
listJsonFiles: async () => {
|
||||
throw new Error(
|
||||
"list /remote/.paperclip-runtime/gemini/paperclip-bridge/queue/requests failed with exit code 255: kex_exchange_identification: read: Connection reset by peer",
|
||||
);
|
||||
},
|
||||
readTextFile: async () => {
|
||||
throw new Error("unexpected readTextFile");
|
||||
},
|
||||
writeTextFile: async () => {
|
||||
throw new Error("unexpected writeTextFile");
|
||||
},
|
||||
rename: async () => {
|
||||
throw new Error("unexpected rename");
|
||||
},
|
||||
remove: async () => {},
|
||||
},
|
||||
queueDir,
|
||||
authorizeRequest: async () => null,
|
||||
handleRequest: async () => ({
|
||||
status: 200,
|
||||
body: "ok",
|
||||
}),
|
||||
});
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
await worker.stop();
|
||||
expect(unhandled).toEqual([]);
|
||||
} finally {
|
||||
process.off("unhandledRejection", onUnhandledRejection);
|
||||
}
|
||||
});
|
||||
|
||||
it("serializes remote response writes so stop does not recreate a late orphaned response", async () => {
|
||||
const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-bridge-response-lock-"));
|
||||
cleanupDirs.push(rootDir);
|
||||
|
|
|
|||
|
|
@ -610,6 +610,8 @@ export async function startSandboxCallbackBridgeWorker(input: {
|
|||
});
|
||||
const authorizeRequest = input.authorizeRequest ??
|
||||
((request: SandboxCallbackBridgeRequest) => authorizeSandboxCallbackBridgeRequestWithRoutes(request));
|
||||
const buildWorkerFailureMessage = (error: unknown) =>
|
||||
`Sandbox callback bridge worker failed: ${error instanceof Error ? error.message : String(error)}`;
|
||||
|
||||
const processRequestFile = async (fileName: string) => {
|
||||
const requestPath = path.posix.join(directories.requestsDir, fileName);
|
||||
|
|
@ -725,6 +727,16 @@ export async function startSandboxCallbackBridgeWorker(input: {
|
|||
break;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const message = buildWorkerFailureMessage(error);
|
||||
console.warn(`[paperclip] ${message}`);
|
||||
try {
|
||||
await failPendingRequests(message);
|
||||
} catch (failPendingError) {
|
||||
console.warn(
|
||||
`[paperclip] sandbox callback bridge failed to abort queued requests after worker failure: ${failPendingError instanceof Error ? failPendingError.message : String(failPendingError)}`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
settled = true;
|
||||
if (settleResolve) {
|
||||
|
|
|
|||
|
|
@ -848,6 +848,26 @@ describe("rewriteWorkspaceCwdEnvVarsForExecution", () => {
|
|||
RANDOM_WORKSPACE_CWD_TOKEN: "/host/workspace",
|
||||
});
|
||||
});
|
||||
|
||||
it("only rewrites matching *_WORKSPACE_CWD string values", () => {
|
||||
const env = rewriteWorkspaceCwdEnvVarsForExecution({
|
||||
workspaceCwd: "/host/workspace",
|
||||
executionCwd: "/remote/workspace",
|
||||
executionTargetIsRemote: true,
|
||||
env: {
|
||||
MATCHING_WORKSPACE_CWD: "/host/workspace/.",
|
||||
DIFFERENT_WORKSPACE_CWD: "/host/other-workspace",
|
||||
BLANK_WORKSPACE_CWD: " ",
|
||||
NON_STRING_WORKSPACE_CWD: 42,
|
||||
},
|
||||
});
|
||||
|
||||
expect(env).toEqual({
|
||||
MATCHING_WORKSPACE_CWD: "/remote/workspace",
|
||||
DIFFERENT_WORKSPACE_CWD: "/host/other-workspace",
|
||||
BLANK_WORKSPACE_CWD: " ",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("refreshPaperclipWorkspaceEnvForExecution", () => {
|
||||
|
|
|
|||
|
|
@ -1012,8 +1012,13 @@ export function rewriteWorkspaceCwdEnvVarsForExecution(input: {
|
|||
const localWorkspaceCwd = typeof input.workspaceCwd === "string" && input.workspaceCwd.trim().length > 0
|
||||
? path.resolve(input.workspaceCwd)
|
||||
: null;
|
||||
// executionCwd is a remote path on the target host; we deliberately do not
|
||||
// run `path.resolve` against it because that applies host-Node semantics
|
||||
// (current working directory, host path separator) to a path that lives on
|
||||
// the remote shell. Callers always pass absolute remote paths, so we
|
||||
// forward the trimmed value verbatim.
|
||||
const remoteWorkspaceCwd = typeof input.executionCwd === "string" && input.executionCwd.trim().length > 0
|
||||
? path.resolve(input.executionCwd)
|
||||
? input.executionCwd.trim()
|
||||
: null;
|
||||
|
||||
if (!input.executionTargetIsRemote || !localWorkspaceCwd || !remoteWorkspaceCwd) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue