[codex] Harden heartbeat scheduling and runtime controls (#4223)

## Thinking Path

> - Paperclip orchestrates AI agents through issue checkout, heartbeat
runs, routines, and auditable control-plane state
> - The runtime path has to recover from lost local processes, transient
adapter failures, blocked dependencies, and routine coalescing without
stranding work
> - The existing branch carried several reliability fixes across
heartbeat scheduling, issue runtime controls, routine dispatch, and
operator-facing run state
> - These changes belong together because they share backend contracts,
migrations, and runtime status semantics
> - This pull request groups the control-plane/runtime slice so it can
merge independently from board UI polish and adapter sandbox work
> - The benefit is safer heartbeat recovery, clearer runtime controls,
and more predictable recurring execution behavior

## What Changed

- Adds bounded heartbeat retry scheduling, scheduled retry state, and
Codex transient failure recovery handling.
- Tightens heartbeat process recovery, blocker wake behavior, issue
comment wake handling, routine dispatch coalescing, and
activity/dashboard bounds.
- Adds runtime-control MCP tools and Paperclip skill docs for issue
workspace runtime management.
- Adds migrations `0061_lively_thor_girl.sql` and
`0062_routine_run_dispatch_fingerprint.sql`.
- Surfaces retry state in run ledger/agent UI and keeps related shared
types synchronized.

## Verification

- `pnpm exec vitest run
server/src/__tests__/heartbeat-retry-scheduling.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts
server/src/__tests__/routines-service.test.ts`
- `pnpm exec vitest run src/tools.test.ts` from `packages/mcp-server`

## Risks

- Medium risk: this touches heartbeat recovery and routine dispatch,
which are central execution paths.
- Migration order matters if split branches land out of order: merge
this PR before branches that assume the new runtime/routine fields.
- Runtime retry behavior should be watched in CI and in local operator
smoke tests because it changes how transient failures are resumed.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5-based coding agent runtime, shell/git tool use
enabled. Exact hosted model build and context window are not exposed in
this Paperclip heartbeat environment.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Dotta 2026-04-21 12:24:11 -05:00 committed by GitHub
parent ab9051b595
commit 09d0678840
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 17622 additions and 456 deletions

View file

@ -221,16 +221,6 @@ describe("runChildProcess", () => {
});
});
describe("appendWithByteCap", () => {
it("keeps valid UTF-8 when trimming through multibyte text", () => {
const output = appendWithByteCap("prefix ", "hello — world", 7);
expect(output).not.toContain("\uFFFD");
expect(Buffer.from(output, "utf8").toString("utf8")).toBe(output);
expect(Buffer.byteLength(output, "utf8")).toBeLessThanOrEqual(7);
});
});
describe("renderPaperclipWakePrompt", () => {
it("keeps the default local-agent prompt action-oriented", () => {
expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("Start actionable work in this heartbeat");
@ -266,6 +256,42 @@ describe("renderPaperclipWakePrompt", () => {
expect(prompt).toContain("mark blocked work with the unblock owner/action");
});
it("renders dependency-blocked interaction guidance", () => {
const prompt = renderPaperclipWakePrompt({
reason: "issue_commented",
issue: {
id: "issue-1",
identifier: "PAP-1703",
title: "Blocked parent",
status: "todo",
},
dependencyBlockedInteraction: true,
unresolvedBlockerIssueIds: ["blocker-1"],
unresolvedBlockerSummaries: [
{
id: "blocker-1",
identifier: "PAP-1723",
title: "Finish blocker",
status: "todo",
priority: "medium",
},
],
commentWindow: {
requestedCount: 1,
includedCount: 1,
missingCount: 0,
},
commentIds: ["comment-1"],
latestCommentId: "comment-1",
comments: [{ id: "comment-1", body: "hello" }],
fallbackFetchNeeded: false,
});
expect(prompt).toContain("dependency-blocked interaction: yes");
expect(prompt).toContain("respond or triage the human comment");
expect(prompt).toContain("PAP-1723 Finish blocker (todo)");
});
it("includes continuation and child issue summaries in structured wake context", () => {
const payload = {
reason: "issue_children_completed",
@ -335,3 +361,13 @@ describe("renderPaperclipWakePrompt", () => {
expect(prompt).toContain("Added the helper route and tests.");
});
});
describe("appendWithByteCap", () => {
it("keeps valid UTF-8 when trimming through multibyte text", () => {
const output = appendWithByteCap("prefix ", "hello — world", 7);
expect(output).not.toContain("\uFFFD");
expect(Buffer.from(output, "utf8").toString("utf8")).toBe(output);
expect(Buffer.byteLength(output, "utf8")).toBeLessThanOrEqual(7);
});
});

View file

@ -83,6 +83,7 @@ export const DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE = [
"- Start actionable work in this heartbeat; do not stop at a plan unless the issue asks for planning.",
"- Leave durable progress in comments, documents, or work products with a clear next action.",
"- Use child issues for parallel or long delegated work instead of polling agents, sessions, or processes.",
"- If woken by a human comment on a dependency-blocked issue, respond or triage the comment without treating the blocked deliverable work as unblocked.",
"- If blocked, mark the issue blocked and name the unblock owner and action.",
"- Respect budget, pause/cancel, approval gates, and company boundaries.",
].join("\n");
@ -313,10 +314,21 @@ type PaperclipWakeChildIssueSummary = {
summary: string | null;
};
type PaperclipWakeBlockerSummary = {
id: string | null;
identifier: string | null;
title: string | null;
status: string | null;
priority: string | null;
};
type PaperclipWakePayload = {
reason: string | null;
issue: PaperclipWakeIssue | null;
checkedOutByHarness: boolean;
dependencyBlockedInteraction: boolean;
unresolvedBlockerIssueIds: string[];
unresolvedBlockerSummaries: PaperclipWakeBlockerSummary[];
executionStage: PaperclipWakeExecutionStage | null;
continuationSummary: PaperclipWakeContinuationSummary | null;
livenessContinuation: PaperclipWakeLivenessContinuation | null;
@ -409,6 +421,17 @@ function normalizePaperclipWakeChildIssueSummary(value: unknown): PaperclipWakeC
return { id, identifier, title, status, priority, summary };
}
function normalizePaperclipWakeBlockerSummary(value: unknown): PaperclipWakeBlockerSummary | null {
const blocker = parseObject(value);
const id = asString(blocker.id, "").trim() || null;
const identifier = asString(blocker.identifier, "").trim() || null;
const title = asString(blocker.title, "").trim() || null;
const status = asString(blocker.status, "").trim() || null;
const priority = asString(blocker.priority, "").trim() || null;
if (!id && !identifier && !title && !status) return null;
return { id, identifier, title, status, priority };
}
function normalizePaperclipWakeExecutionPrincipal(value: unknown): PaperclipWakeExecutionPrincipal | null {
const principal = parseObject(value);
const typeRaw = asString(principal.type, "").trim().toLowerCase();
@ -474,8 +497,18 @@ export function normalizePaperclipWakePayload(value: unknown): PaperclipWakePayl
.map((entry) => normalizePaperclipWakeChildIssueSummary(entry))
.filter((entry): entry is PaperclipWakeChildIssueSummary => Boolean(entry))
: [];
const unresolvedBlockerIssueIds = Array.isArray(payload.unresolvedBlockerIssueIds)
? payload.unresolvedBlockerIssueIds
.map((entry) => asString(entry, "").trim())
.filter(Boolean)
: [];
const unresolvedBlockerSummaries = Array.isArray(payload.unresolvedBlockerSummaries)
? payload.unresolvedBlockerSummaries
.map((entry) => normalizePaperclipWakeBlockerSummary(entry))
.filter((entry): entry is PaperclipWakeBlockerSummary => Boolean(entry))
: [];
if (comments.length === 0 && commentIds.length === 0 && childIssueSummaries.length === 0 && !executionStage && !continuationSummary && !livenessContinuation && !normalizePaperclipWakeIssue(payload.issue)) {
if (comments.length === 0 && commentIds.length === 0 && childIssueSummaries.length === 0 && unresolvedBlockerIssueIds.length === 0 && unresolvedBlockerSummaries.length === 0 && !executionStage && !continuationSummary && !livenessContinuation && !normalizePaperclipWakeIssue(payload.issue)) {
return null;
}
@ -483,6 +516,9 @@ export function normalizePaperclipWakePayload(value: unknown): PaperclipWakePayl
reason: asString(payload.reason, "").trim() || null,
issue: normalizePaperclipWakeIssue(payload.issue),
checkedOutByHarness: asBoolean(payload.checkedOutByHarness, false),
dependencyBlockedInteraction: asBoolean(payload.dependencyBlockedInteraction, false),
unresolvedBlockerIssueIds,
unresolvedBlockerSummaries,
executionStage,
continuationSummary,
livenessContinuation,
@ -563,6 +599,18 @@ export function renderPaperclipWakePrompt(
if (normalized.checkedOutByHarness) {
lines.push("- checkout: already claimed by the harness for this run");
}
if (normalized.dependencyBlockedInteraction) {
lines.push("- dependency-blocked interaction: yes");
lines.push("- execution scope: respond or triage the human comment; do not treat blocker-dependent deliverable work as unblocked");
if (normalized.unresolvedBlockerSummaries.length > 0) {
const blockers = normalized.unresolvedBlockerSummaries
.map((blocker) => `${blocker.identifier ?? blocker.id ?? "unknown"}${blocker.title ? ` ${blocker.title}` : ""}${blocker.status ? ` (${blocker.status})` : ""}`)
.join("; ");
lines.push(`- unresolved blockers: ${blockers}`);
} else if (normalized.unresolvedBlockerIssueIds.length > 0) {
lines.push(`- unresolved blocker issue ids: ${normalized.unresolvedBlockerIssueIds.join(", ")}`);
}
}
if (normalized.missingCount > 0) {
lines.push(`- omitted comments: ${normalized.missingCount}`);
}