fix: harden heartbeat and adapter runtime workflows

This commit is contained in:
Dotta 2026-04-10 22:26:21 -05:00
parent 548721248e
commit c566a9236c
48 changed files with 14922 additions and 600 deletions

View file

@ -2,6 +2,24 @@ import { randomUUID } from "node:crypto";
import { describe, expect, it } from "vitest";
import { runChildProcess } from "./server-utils.js";
function isPidAlive(pid: number) {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
async function waitForPidExit(pid: number, timeoutMs = 2_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
if (!isPidAlive(pid)) return true;
await new Promise((resolve) => setTimeout(resolve, 50));
}
return !isPidAlive(pid);
}
describe("runChildProcess", () => {
it("waits for onSpawn before sending stdin to the child", async () => {
const spawnDelayMs = 150;
@ -35,4 +53,36 @@ describe("runChildProcess", () => {
expect(onSpawnCompletedAt).toBeGreaterThanOrEqual(startedAt + spawnDelayMs);
expect(finishedAt - startedAt).toBeGreaterThanOrEqual(spawnDelayMs);
});
it.skipIf(process.platform === "win32")("kills descendant processes on timeout via the process group", async () => {
let descendantPid: number | null = null;
const result = await runChildProcess(
randomUUID(),
process.execPath,
[
"-e",
[
"const { spawn } = require('node:child_process');",
"const child = spawn(process.execPath, ['-e', 'setInterval(() => {}, 1000)'], { stdio: 'ignore' });",
"process.stdout.write(String(child.pid));",
"setInterval(() => {}, 1000);",
].join(" "),
],
{
cwd: process.cwd(),
env: {},
timeoutSec: 1,
graceSec: 1,
onLog: async () => {},
onSpawn: async () => {},
},
);
descendantPid = Number.parseInt(result.stdout.trim(), 10);
expect(result.timedOut).toBe(true);
expect(Number.isInteger(descendantPid) && descendantPid > 0).toBe(true);
expect(await waitForPidExit(descendantPid!, 2_000)).toBe(true);
});
});

View file

@ -19,6 +19,7 @@ export interface RunProcessResult {
interface RunningProcess {
child: ChildProcess;
graceSec: number;
processGroupId: number | null;
}
interface SpawnTarget {
@ -34,6 +35,28 @@ type ChildProcessWithEvents = ChildProcess & {
): ChildProcess;
};
function resolveProcessGroupId(child: ChildProcess) {
if (process.platform === "win32") return null;
return typeof child.pid === "number" && child.pid > 0 ? child.pid : null;
}
function signalRunningProcess(
running: Pick<RunningProcess, "child" | "processGroupId">,
signal: NodeJS.Signals,
) {
if (process.platform !== "win32" && running.processGroupId && running.processGroupId > 0) {
try {
process.kill(-running.processGroupId, signal);
return;
} catch {
// Fall back to the direct child signal if group signaling fails.
}
}
if (!running.child.killed) {
running.child.kill(signal);
}
}
export const runningProcesses = new Map<string, RunningProcess>();
export const MAX_CAPTURE_BYTES = 4 * 1024 * 1024;
export const MAX_EXCERPT_BYTES = 32 * 1024;
@ -1034,7 +1057,7 @@ export async function runChildProcess(
graceSec: number;
onLog: (stream: "stdout" | "stderr", chunk: string) => Promise<void>;
onLogError?: (err: unknown, runId: string, message: string) => void;
onSpawn?: (meta: { pid: number; startedAt: string }) => Promise<void>;
onSpawn?: (meta: { pid: number; processGroupId: number | null; startedAt: string }) => Promise<void>;
stdin?: string;
},
): Promise<RunProcessResult> {
@ -1064,19 +1087,21 @@ export async function runChildProcess(
const child = spawn(target.command, target.args, {
cwd: opts.cwd,
env: mergedEnv,
detached: process.platform !== "win32",
shell: false,
stdio: [opts.stdin != null ? "pipe" : "ignore", "pipe", "pipe"],
}) as ChildProcessWithEvents;
const startedAt = new Date().toISOString();
const processGroupId = resolveProcessGroupId(child);
const spawnPersistPromise =
typeof child.pid === "number" && child.pid > 0 && opts.onSpawn
? opts.onSpawn({ pid: child.pid, startedAt }).catch((err) => {
? opts.onSpawn({ pid: child.pid, processGroupId, startedAt }).catch((err) => {
onLogError(err, runId, "failed to record child process metadata");
})
: Promise.resolve();
runningProcesses.set(runId, { child, graceSec: opts.graceSec });
runningProcesses.set(runId, { child, graceSec: opts.graceSec, processGroupId });
let timedOut = false;
let stdout = "";
@ -1087,11 +1112,9 @@ export async function runChildProcess(
opts.timeoutSec > 0
? setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
signalRunningProcess({ child, processGroupId }, "SIGTERM");
setTimeout(() => {
if (!child.killed) {
child.kill("SIGKILL");
}
signalRunningProcess({ child, processGroupId }, "SIGKILL");
}, Math.max(1, opts.graceSec) * 1000);
}, opts.timeoutSec * 1000)
: null;

View file

@ -120,7 +120,7 @@ export interface AdapterExecutionContext {
context: Record<string, unknown>;
onLog: (stream: "stdout" | "stderr", chunk: string) => Promise<void>;
onMeta?: (meta: AdapterInvocationMeta) => Promise<void>;
onSpawn?: (meta: { pid: number; startedAt: string }) => Promise<void>;
onSpawn?: (meta: { pid: number; processGroupId: number | null; startedAt: string }) => Promise<void>;
authToken?: string;
}