[codex] Add runtime lifecycle recovery and live issue visibility (#4419)

This commit is contained in:
Dotta 2026-04-24 15:50:32 -05:00 committed by GitHub
parent 9a8d219949
commit 5a0c1979cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
121 changed files with 9625 additions and 2044 deletions

View file

@ -81,6 +81,7 @@ import {
} from "../services/default-agent-instructions.js";
import { getTelemetryClient } from "../telemetry.js";
import { assertEnvironmentSelectionForCompany } from "./environment-selection.js";
import { recoveryService } from "../services/recovery/service.js";
const RUN_LOG_DEFAULT_LIMIT_BYTES = 256_000;
const RUN_LOG_MAX_LIMIT_BYTES = 1024 * 1024;
@ -91,6 +92,12 @@ function readRunLogLimitBytes(value: unknown) {
return Math.max(1, Math.min(RUN_LOG_MAX_LIMIT_BYTES, Math.trunc(parsed)));
}
function readLiveRunsQueryInt(value: unknown, max: number, fallback = 0) {
const parsed = Number(value);
if (!Number.isFinite(parsed)) return fallback;
return Math.max(0, Math.min(max, Math.trunc(parsed)));
}
export function agentRoutes(
db: Db,
options: { pluginWorkerManager?: PluginWorkerManager } = {},
@ -142,6 +149,7 @@ export function agentRoutes(
const heartbeat = heartbeatService(db, {
pluginWorkerManager: options.pluginWorkerManager,
});
const recovery = recoveryService(db, { enqueueWakeup: heartbeat.wakeup });
const issueApprovalsSvc = issueApprovalService(db);
const secretsSvc = secretService(db);
const instructions = agentInstructionsService();
@ -2532,11 +2540,12 @@ export function agentRoutes(
const companyId = req.params.companyId as string;
assertCompanyAccess(req, companyId);
const minCountParam = req.query.minCount as string | undefined;
const minCount = minCountParam ? Math.max(0, Math.min(20, parseInt(minCountParam, 10) || 0)) : 0;
const minCount = readLiveRunsQueryInt(req.query.minCount, 50);
const limit = readLiveRunsQueryInt(req.query.limit, 50);
const columns = {
id: heartbeatRuns.id,
companyId: heartbeatRuns.companyId,
status: heartbeatRuns.status,
invocationSource: heartbeatRuns.invocationSource,
triggerDetail: heartbeatRuns.triggerDetail,
@ -2546,15 +2555,21 @@ export function agentRoutes(
agentId: heartbeatRuns.agentId,
agentName: agentsTable.name,
adapterType: agentsTable.adapterType,
logBytes: heartbeatRuns.logBytes,
livenessState: heartbeatRuns.livenessState,
livenessReason: heartbeatRuns.livenessReason,
continuationAttempt: heartbeatRuns.continuationAttempt,
lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt,
nextAction: heartbeatRuns.nextAction,
lastOutputAt: heartbeatRuns.lastOutputAt,
lastOutputSeq: heartbeatRuns.lastOutputSeq,
lastOutputStream: heartbeatRuns.lastOutputStream,
lastOutputBytes: heartbeatRuns.lastOutputBytes,
processStartedAt: heartbeatRuns.processStartedAt,
issueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"),
};
const liveRuns = await db
const liveRunsQuery = db
.select(columns)
.from(heartbeatRuns)
.innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id))
@ -2566,7 +2581,10 @@ export function agentRoutes(
)
.orderBy(desc(heartbeatRuns.createdAt));
if (minCount > 0 && liveRuns.length < minCount) {
const liveRuns = limit > 0 ? await liveRunsQuery.limit(limit) : await liveRunsQuery;
const targetRunCount = limit > 0 ? Math.min(minCount, limit) : minCount;
if (targetRunCount > 0 && liveRuns.length < targetRunCount) {
const activeIds = liveRuns.map((r) => r.id);
const recentRuns = await db
.select(columns)
@ -2580,13 +2598,20 @@ export function agentRoutes(
),
)
.orderBy(desc(heartbeatRuns.createdAt))
.limit(minCount - liveRuns.length);
.limit(targetRunCount - liveRuns.length);
res.json([...liveRuns, ...recentRuns]);
const rows = [...liveRuns, ...recentRuns];
res.json(await Promise.all(rows.map(async (run) => ({
...run,
outputSilence: await heartbeat.buildRunOutputSilence(run),
}))));
return;
}
res.json(liveRuns);
res.json(await Promise.all(liveRuns.map(async (run) => ({
...run,
outputSilence: await heartbeat.buildRunOutputSilence(run),
}))));
});
router.get("/heartbeat-runs/:runId", async (req, res) => {
@ -2600,7 +2625,7 @@ export function agentRoutes(
const retryExhaustedReason = await heartbeat.getRetryExhaustedReason(runId);
res.json(
redactCurrentUserValue(
{ ...run, retryExhaustedReason },
{ ...run, retryExhaustedReason, outputSilence: await heartbeat.buildRunOutputSilence(run) },
await getCurrentUserRedactionOptions(),
),
);
@ -2630,6 +2655,42 @@ export function agentRoutes(
res.json(run);
});
router.post("/heartbeat-runs/:runId/watchdog-decisions", async (req, res) => {
const runId = req.params.runId as string;
const existing = await heartbeat.getRun(runId);
if (!existing) {
res.status(404).json({ error: "Heartbeat run not found" });
return;
}
assertCompanyAccess(req, existing.companyId);
const decision = typeof req.body?.decision === "string" ? req.body.decision : "";
if (!["snooze", "continue", "dismissed_false_positive"].includes(decision)) {
res.status(400).json({ error: "Unsupported watchdog decision" });
return;
}
const evaluationIssueId = typeof req.body?.evaluationIssueId === "string" ? req.body.evaluationIssueId : null;
const reason = typeof req.body?.reason === "string" ? req.body.reason.slice(0, 4000) : null;
const snoozedUntil = decision === "snooze"
? new Date(String(req.body?.snoozedUntil ?? ""))
: null;
if (decision === "snooze" && (!snoozedUntil || Number.isNaN(snoozedUntil.getTime()) || snoozedUntil <= new Date())) {
res.status(400).json({ error: "snoozedUntil must be a future ISO datetime" });
return;
}
const row = await recovery.recordWatchdogDecision({
runId: existing.id,
actor: req.actor,
decision: decision as "snooze" | "continue" | "dismissed_false_positive",
evaluationIssueId,
reason,
snoozedUntil,
createdByRunId: req.actor.runId ?? null,
});
res.json(row);
});
router.get("/heartbeat-runs/:runId/events", async (req, res) => {
const runId = req.params.runId as string;
const run = await heartbeat.getRun(runId);
@ -2730,11 +2791,17 @@ export function agentRoutes(
agentId: heartbeatRuns.agentId,
agentName: agentsTable.name,
adapterType: agentsTable.adapterType,
logBytes: heartbeatRuns.logBytes,
livenessState: heartbeatRuns.livenessState,
livenessReason: heartbeatRuns.livenessReason,
continuationAttempt: heartbeatRuns.continuationAttempt,
lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt,
nextAction: heartbeatRuns.nextAction,
lastOutputAt: heartbeatRuns.lastOutputAt,
lastOutputSeq: heartbeatRuns.lastOutputSeq,
lastOutputStream: heartbeatRuns.lastOutputStream,
lastOutputBytes: heartbeatRuns.lastOutputBytes,
processStartedAt: heartbeatRuns.processStartedAt,
})
.from(heartbeatRuns)
.innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id))
@ -2747,7 +2814,10 @@ export function agentRoutes(
)
.orderBy(desc(heartbeatRuns.createdAt));
res.json(liveRuns);
res.json(await Promise.all(liveRuns.map(async (run) => ({
...run,
outputSilence: await heartbeat.buildRunOutputSilence({ ...run, companyId: issue.companyId }),
}))));
});
router.get("/issues/:issueId/active-run", async (req, res) => {
@ -2795,6 +2865,7 @@ export function agentRoutes(
agentId: agent.id,
agentName: agent.name,
adapterType: agent.adapterType,
outputSilence: await heartbeat.buildRunOutputSilence({ ...run, companyId: issue.companyId }),
});
});