[codex] Harden execution reliability and heartbeat tooling (#3679)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - Reliable execution depends on heartbeat routing, issue lifecycle
semantics, telemetry, and a fast enough local verification loop to keep
regressions visible
> - The remaining commits on this branch were mostly server/runtime
correctness fixes plus test and documentation follow-ups in that area
> - Those changes are logically separate from the UI-focused
issue-detail and workspace/navigation branches even when they touch
overlapping issue APIs
> - This pull request groups the execution reliability, heartbeat,
telemetry, and tooling changes into one standalone branch
> - The benefit is a focused review of the control-plane correctness
work, including the follow-up fix that restored the implicit
comment-reopen helpers after branch splitting

## What Changed

- Hardened issue/heartbeat execution behavior, including self-review
stage skipping, deferred mention wakes during active execution, stranded
execution recovery, active-run scoping, assignee resolution, and
blocked-to-todo wake resumption
- Reduced noisy polling/logging overhead by trimming issue run payloads,
compacting persisted run logs, silencing high-volume request logs, and
capping heartbeat-run queries in dashboard/inbox surfaces
- Expanded telemetry and status semantics with adapter/model fields on
task completion plus clearer status guidance in docs/onboarding material
- Updated test infrastructure and verification defaults with faster
route-test module isolation, cheaper default `pnpm test`, e2e isolation
from local state, and repo verification follow-ups
- Included docs/release housekeeping from the branch and added a small
follow-up commit restoring the implicit comment-reopen helpers that were
dropped during branch reconstruction

## Verification

- `pnpm vitest run
server/src/__tests__/issue-comment-reopen-routes.test.ts
server/src/__tests__/issue-telemetry-routes.test.ts`
- `pnpm vitest run server/src/__tests__/http-log-policy.test.ts
server/src/__tests__/heartbeat-run-log.test.ts
server/src/__tests__/health.test.ts`
- `server/src/__tests__/activity-service.test.ts`,
`server/src/__tests__/heartbeat-comment-wake-batching.test.ts`, and
`server/src/__tests__/heartbeat-process-recovery.test.ts` were attempted
on this host but the embedded Postgres harness reported
init-script/data-dir problems and skipped or failed to start, so they
are noted as environment-limited

## Risks

- Medium: this branch changes core issue/heartbeat routing and
reopen/wakeup behavior, so regressions would affect agent execution flow
rather than isolated UI polish
- Because it also updates verification infrastructure, reviewers should
pay attention to whether the new tests are asserting the right failure
modes and not just reshaping harness behavior

## Model Used

- OpenAI Codex coding agent (GPT-5-class runtime in Codex CLI; exact
deployed model ID is not exposed in this environment), reasoning
enabled, tool use and local code execution enabled

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [ ] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-04-14 13:34:52 -05:00 committed by GitHub
parent e89076148a
commit 7f893ac4ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 4682 additions and 713 deletions

View file

@ -95,6 +95,10 @@ async function waitFor(condition: () => boolean | Promise<boolean>, timeoutMs =
throw new Error("Timed out waiting for condition");
}
async function closeDbClient(db: ReturnType<typeof createDb> | undefined) {
await db?.$client?.end?.({ timeout: 0 });
}
async function createControlledGatewayServer() {
const server = createServer();
const wss = new WebSocketServer({ server });
@ -225,6 +229,7 @@ describe("heartbeat comment wake batching", () => {
}, 45_000);
afterAll(async () => {
await closeDbClient(db);
await instance?.stop();
if (dataDir) {
fs.rmSync(dataDir, { recursive: true, force: true });
@ -761,6 +766,169 @@ describe("heartbeat comment wake batching", () => {
}
}, 20_000);
it("defers mentioned-agent wakes while another agent is actively executing the same issue", async () => {
const gateway = await createControlledGatewayServer();
const companyId = randomUUID();
const primaryAgentId = randomUUID();
const mentionedAgentId = randomUUID();
const issueId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const heartbeat = heartbeatService(db);
try {
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values([
{
id: primaryAgentId,
companyId,
name: "Primary Agent",
role: "engineer",
status: "idle",
adapterType: "openclaw_gateway",
adapterConfig: {
url: gateway.url,
headers: {
"x-openclaw-token": "gateway-token",
},
payloadTemplate: {
message: "wake now",
},
waitTimeoutMs: 2_000,
},
runtimeConfig: {},
permissions: {},
},
{
id: mentionedAgentId,
companyId,
name: "Mentioned Agent",
role: "engineer",
status: "idle",
adapterType: "openclaw_gateway",
adapterConfig: {
url: gateway.url,
headers: {
"x-openclaw-token": "gateway-token",
},
payloadTemplate: {
message: "wake now",
},
waitTimeoutMs: 2_000,
},
runtimeConfig: {},
permissions: {},
},
]);
await db.insert(issues).values({
id: issueId,
companyId,
title: "Prevent concurrent mention execution",
status: "todo",
priority: "high",
assigneeAgentId: primaryAgentId,
issueNumber: 1,
identifier: `${issuePrefix}-1`,
});
const primaryRun = await heartbeat.wakeup(primaryAgentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: { issueId },
contextSnapshot: {
issueId,
taskId: issueId,
wakeReason: "issue_assigned",
},
requestedByActorType: "system",
requestedByActorId: null,
});
expect(primaryRun).not.toBeNull();
await waitFor(() => gateway.getAgentPayloads().length === 1);
const mentionComment = await db
.insert(issueComments)
.values({
companyId,
issueId,
authorUserId: "user-1",
body: "@Mentioned Agent please inspect this after the current run.",
})
.returning()
.then((rows) => rows[0]);
const mentionRun = await heartbeat.wakeup(mentionedAgentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_comment_mentioned",
payload: { issueId, commentId: mentionComment.id },
contextSnapshot: {
issueId,
taskId: issueId,
commentId: mentionComment.id,
wakeCommentId: mentionComment.id,
wakeReason: "issue_comment_mentioned",
source: "comment.mention",
},
requestedByActorType: "user",
requestedByActorId: "user-1",
});
expect(mentionRun).toBeNull();
await waitFor(async () => {
const deferred = await db
.select()
.from(agentWakeupRequests)
.where(
and(
eq(agentWakeupRequests.companyId, companyId),
eq(agentWakeupRequests.agentId, mentionedAgentId),
eq(agentWakeupRequests.status, "deferred_issue_execution"),
),
)
.then((rows) => rows[0] ?? null);
return Boolean(deferred);
});
expect(gateway.getAgentPayloads()).toHaveLength(1);
gateway.releaseFirstWait();
await waitFor(() => gateway.getAgentPayloads().length === 2, 90_000);
await waitFor(async () => {
const runs = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, mentionedAgentId))
.orderBy(asc(heartbeatRuns.createdAt));
return runs.length === 1 && runs[0]?.status === "succeeded";
}, 90_000);
const mentionedRuns = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, mentionedAgentId))
.orderBy(asc(heartbeatRuns.createdAt));
expect(mentionedRuns).toHaveLength(1);
expect(mentionedRuns[0]?.contextSnapshot).toMatchObject({
issueId,
wakeReason: "issue_comment_mentioned",
});
} finally {
gateway.releaseFirstWait();
await gateway.close();
}
}, 120_000);
it("treats the automatic run summary as fallback-only when the run already posted a comment", async () => {
const gateway = await createControlledGatewayServer();
const companyId = randomUUID();