[codex] harden heartbeat run summaries and recovery context (#3742)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Heartbeat runs are the control-plane record of what agents did, why
they woke up, and what operators should see next
> - Run lists, stranded issue comments, and live log polling all depend
on compact but accurate heartbeat summaries
> - The current branch had a focused backend slice that improves how run
result JSON is summarized, how stale process recovery comments are
written, and how live log polling resolves the active run
> - This pull request isolates that heartbeat/runtime reliability work
from the unrelated UI and dev-tooling changes
> - The benefit is more reliable issue context and cheaper run lookups
without dragging unrelated board UI changes into the same review
## What Changed
- Include the latest run failure in stranded issue comments during
orphaned process recovery.
- Bound heartbeat `result_json` payloads for list responses while
preserving the raw stored payloads.
- Narrow heartbeat log endpoint lookups so issue polling resolves the
relevant active run with less unnecessary scanning.
- Add focused tests for heartbeat list summaries, live run polling,
orphaned process recovery, and the run context/result summary helpers.
## Verification
- `pnpm vitest run
server/src/__tests__/heartbeat-context-summary.test.ts
server/src/__tests__/heartbeat-list.test.ts
server/src/__tests__/agent-live-run-routes.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts`
## Risks
- The main risk is accidentally hiding a field that some client still
expects from summarized `result_json`, or over-constraining the live log
lookup path for edge-case run routing.
- Recovery comments now surface the latest failure more aggressively, so
wording changes may affect downstream expectations if anyone parses
those comments too strictly.
## Model Used
- OpenAI Codex, GPT-5-based coding agent in the Codex CLI environment.
Exact backend model deployment ID was not exposed in-session.
Tool-assisted editing and shell execution were used.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-15 09:48:39 -05:00
|
|
|
export const HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS = 500;
|
|
|
|
|
export const HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS = 4_096;
|
|
|
|
|
export const HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES = 64 * 1024;
|
|
|
|
|
|
|
|
|
|
function truncateSummaryText(value: unknown, maxLength = HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS) {
|
2026-03-11 17:23:33 -05:00
|
|
|
if (typeof value !== "string") return null;
|
|
|
|
|
return value.length > maxLength ? value.slice(0, maxLength) : value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function readNumericField(record: Record<string, unknown>, key: string) {
|
|
|
|
|
return key in record ? record[key] ?? null : undefined;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-31 20:21:13 +01:00
|
|
|
function readCommentText(value: unknown) {
|
|
|
|
|
if (typeof value !== "string") return null;
|
|
|
|
|
const trimmed = value.trim();
|
|
|
|
|
return trimmed.length > 0 ? trimmed : null;
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 22:26:21 -05:00
|
|
|
export function mergeHeartbeatRunResultJson(
|
|
|
|
|
resultJson: Record<string, unknown> | null | undefined,
|
|
|
|
|
summary: string | null | undefined,
|
|
|
|
|
): Record<string, unknown> | null {
|
|
|
|
|
const normalizedSummary = readCommentText(summary);
|
|
|
|
|
const baseResult =
|
|
|
|
|
resultJson && typeof resultJson === "object" && !Array.isArray(resultJson)
|
|
|
|
|
? resultJson
|
|
|
|
|
: null;
|
|
|
|
|
|
|
|
|
|
if (!baseResult) {
|
|
|
|
|
return normalizedSummary ? { summary: normalizedSummary } : null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!normalizedSummary) {
|
|
|
|
|
return baseResult;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (readCommentText(baseResult.summary)) {
|
|
|
|
|
return baseResult;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...baseResult,
|
|
|
|
|
summary: normalizedSummary,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-11 17:23:33 -05:00
|
|
|
export function summarizeHeartbeatRunResultJson(
|
|
|
|
|
resultJson: Record<string, unknown> | null | undefined,
|
|
|
|
|
): Record<string, unknown> | null {
|
|
|
|
|
if (!resultJson || typeof resultJson !== "object" || Array.isArray(resultJson)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const summary: Record<string, unknown> = {};
|
|
|
|
|
const textFields = ["summary", "result", "message", "error"] as const;
|
|
|
|
|
for (const key of textFields) {
|
|
|
|
|
const value = truncateSummaryText(resultJson[key]);
|
|
|
|
|
if (value !== null) {
|
|
|
|
|
summary[key] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const numericFieldAliases = ["total_cost_usd", "cost_usd", "costUsd"] as const;
|
|
|
|
|
for (const key of numericFieldAliases) {
|
|
|
|
|
const value = readNumericField(resultJson, key);
|
|
|
|
|
if (value !== undefined && value !== null) {
|
|
|
|
|
summary[key] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
[codex] Add run liveness continuations (#4083)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies.
> - Heartbeat runs are the control-plane record of each agent execution
window.
> - Long-running local agents can exhaust context or stop while still
holding useful next-step state.
> - Operators need that stop reason, next action, and continuation path
to be durable and visible.
> - This pull request adds run liveness metadata, continuation
summaries, and UI surfaces for issue run ledgers.
> - The benefit is that interrupted or long-running work can resume with
clearer context instead of losing the agent's last useful handoff.
## What Changed
- Added heartbeat-run liveness fields, continuation attempt tracking,
and an idempotent `0058` migration.
- Added server services and tests for run liveness, continuation
summaries, stop metadata, and activity backfill.
- Wired local and HTTP adapters to surface continuation/liveness context
through shared adapter utilities.
- Added shared constants, validators, and heartbeat types for liveness
continuation state.
- Added issue-detail UI surfaces for continuation handoffs and the run
ledger, with component tests.
- Updated agent runtime docs, heartbeat protocol docs, prompt guidance,
onboarding assets, and skills instructions to explain continuation
behavior.
- Addressed Greptile feedback by scoping document evidence by run,
excluding system continuation-summary documents from liveness evidence,
importing shared liveness types, surfacing hidden ledger run counts,
documenting bounded retry behavior, and moving run-ledger liveness
backfill off the request path.
## Verification
- `pnpm exec vitest run packages/adapter-utils/src/server-utils.test.ts
server/src/__tests__/run-continuations.test.ts
server/src/__tests__/run-liveness.test.ts
server/src/__tests__/activity-service.test.ts
server/src/__tests__/documents-service.test.ts
server/src/__tests__/issue-continuation-summary.test.ts
server/src/services/heartbeat-stop-metadata.test.ts
ui/src/components/IssueRunLedger.test.tsx
ui/src/components/IssueContinuationHandoff.test.tsx
ui/src/components/IssueDocumentsSection.test.tsx`
- `pnpm --filter @paperclipai/db build`
- `pnpm exec vitest run server/src/__tests__/activity-service.test.ts
ui/src/components/IssueRunLedger.test.tsx`
- `pnpm --filter @paperclipai/ui typecheck`
- `pnpm --filter @paperclipai/server typecheck`
- `pnpm exec vitest run server/src/__tests__/activity-service.test.ts
server/src/__tests__/run-continuations.test.ts
ui/src/components/IssueRunLedger.test.tsx`
- `pnpm exec vitest run
server/src/__tests__/heartbeat-process-recovery.test.ts -t "treats a
plan document update"`
- `pnpm exec vitest run server/src/__tests__/activity-service.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts -t "activity
service|treats a plan document update"`
- Remote PR checks on head `e53b1a1d`: `verify`, `e2e`, `policy`, and
Snyk all passed.
- Confirmed `public-gh/master` is an ancestor of this branch after
fetching `public-gh master`.
- Confirmed `pnpm-lock.yaml` is not included in the branch diff.
- Confirmed migration `0058_wealthy_starbolt.sql` is ordered after
`0057` and uses `IF NOT EXISTS` guards for repeat application.
- Greptile inline review threads are resolved.
## Risks
- Medium risk: this touches heartbeat execution, liveness recovery,
activity rendering, issue routes, shared contracts, docs, and UI.
- Migration risk is mitigated by additive columns/indexes and idempotent
guards.
- Run-ledger liveness backfill is now asynchronous, so the first ledger
response can briefly show historical missing liveness until the
background backfill completes.
- UI screenshot coverage is not included in this packaging pass;
validation is currently through focused component tests.
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex, GPT-5.4, local tool-use coding agent with terminal, git,
GitHub connector, GitHub CLI, and Paperclip API access.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
Screenshot note: no before/after screenshots were captured in this PR
packaging pass; the UI changes are covered by focused component tests
listed above.
---------
Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-20 06:01:49 -05:00
|
|
|
for (const key of ["stopReason", "timeoutSource"] as const) {
|
|
|
|
|
const value = readCommentText(resultJson[key]);
|
|
|
|
|
if (value !== null) {
|
|
|
|
|
summary[key] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const key of ["effectiveTimeoutSec", "effectiveTimeoutMs"] as const) {
|
|
|
|
|
const value = readNumericField(resultJson, key);
|
|
|
|
|
if (value !== undefined && value !== null) {
|
|
|
|
|
summary[key] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const key of ["timeoutConfigured", "timeoutFired"] as const) {
|
|
|
|
|
if (typeof resultJson[key] === "boolean") {
|
|
|
|
|
summary[key] = resultJson[key];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-11 17:23:33 -05:00
|
|
|
return Object.keys(summary).length > 0 ? summary : null;
|
|
|
|
|
}
|
2026-03-31 20:21:13 +01:00
|
|
|
|
|
|
|
|
export function buildHeartbeatRunIssueComment(
|
|
|
|
|
resultJson: Record<string, unknown> | null | undefined,
|
|
|
|
|
): string | null {
|
|
|
|
|
if (!resultJson || typeof resultJson !== "object" || Array.isArray(resultJson)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (
|
|
|
|
|
readCommentText(resultJson.summary)
|
|
|
|
|
?? readCommentText(resultJson.result)
|
|
|
|
|
?? readCommentText(resultJson.message)
|
|
|
|
|
?? null
|
|
|
|
|
);
|
|
|
|
|
}
|