[codex] Recover productive terminal continuations (#4956)

## Thinking Path > - Paperclip orchestrates AI agents through issue-scoped heartbeat runs > - Recovery logic decides whether in-progress work still has a live path after a terminal run > - A productive terminal continuation can still leave an issue stranded when no active run or wake remains > - Treating that state as healthy leaves work stuck despite evidence that more action is needed > - This pull request re-enqueues recovery for productive terminal continuations that left no live path > - The benefit is fewer silently stranded in-progress issues after agents make partial progress ## What Changed - Reclassified successful-but-productive terminal continuations as recoverable when no live path remains. - Enqueue a follow-up recovery wake with the original run id and continuation metadata. - Added regression tests covering productive terminal continuation recovery and advanced liveness handoff. ## Verification - `pnpm exec vitest run server/src/__tests__/heartbeat-process-recovery.test.ts server/src/__tests__/run-continuations.test.ts` ## Risks - Medium risk: recovery may schedule one more follow-up where Paperclip previously considered the work observed. The existing uniqueness, budget, and escalation checks still constrain retry loops. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent, tool use and local command execution. Exact context window was not exposed in the runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-06-16 02:40:39 +09:00 · 2026-05-01 11:57:23 -05:00 · 2026-05-01 11:57:23 -05:00 · 570a4206da
commit 570a4206da
parent 3cd26a78fc
3 changed files with 204 additions and 12 deletions
--- a/server/src/services/recovery/service.ts
+++ b/server/src/services/recovery/service.ts
@ -74,6 +74,7 @@ type LatestIssueRun = Pick<
  typeof heartbeatRuns.$inferSelect,
  "id" | "agentId" | "status" | "error" | "errorCode" | "contextSnapshot" | "livenessState"
 > | null;
+type SuccessfulLatestIssueRun = NonNullable<LatestIssueRun> & { status: "succeeded" };

 type WatchdogDecisionActor =
  | { type: "board"; userId?: string | null; runId?: string | null }
@ -188,7 +189,7 @@ function isUnsuccessfulTerminalIssueRun(latestRun: LatestIssueRun) {
  );
 }

-function isSuccessfulInProgressContinuationRun(latestRun: LatestIssueRun) {
+function isSuccessfulInProgressContinuationRun(latestRun: LatestIssueRun): latestRun is SuccessfulLatestIssueRun {
  return latestRun?.status === "succeeded";
 }

@ -200,6 +201,13 @@ function isProductiveContinuationRun(latestRun: LatestIssueRun) {
      latestRun.livenessState === "needs_followup");
 }

+function isRepeatedProductiveContinuationRecovery(latestRun: SuccessfulLatestIssueRun) {
+  const latestContext = parseObject(latestRun.contextSnapshot);
+  return readNonEmptyString(latestContext.retryReason) === "issue_continuation_needed" &&
+    readNonEmptyString(latestContext.source) === "issue.productive_terminal_continuation_recovery" &&
+    isProductiveContinuationRun(latestRun);
+}
+
 function parseLivenessIncidentKey(incidentKey: string | null | undefined) {
  if (!incidentKey) return null;
  return parseIssueGraphLivenessIncidentKey(incidentKey);
@ -1706,12 +1714,51 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
        continue;
      }
      if (isSuccessfulInProgressContinuationRun(latestRun)) {
-        if (isProductiveContinuationRun(latestRun)) {
-          result.productiveContinuationObserved += 1;
-        } else {
+        const successfulRun = latestRun;
+
+        if (!isProductiveContinuationRun(successfulRun)) {
          result.successfulContinuationObserved += 1;
+          result.skipped += 1;
+          continue;
+        }
+
+        if (isRepeatedProductiveContinuationRecovery(successfulRun)) {
+          const updated = await escalateStrandedAssignedIssue({
+            issue,
+            previousStatus: "in_progress",
+            latestRun: successfulRun,
+            comment:
+              "Paperclip automatically retried continuation for this assigned `in_progress` issue and the retry " +
+              "made progress, but it still has no live execution path. Moving it to `blocked` so it is visible for intervention.",
+          });
+          if (updated) {
+            result.escalated += 1;
+            result.issueIds.push(issue.id);
+          } else {
+            result.skipped += 1;
+          }
+          continue;
+        }
+
+        if (await isInvocationBudgetBlocked(issue, agentId)) {
+          result.skipped += 1;
+          continue;
+        }
+
+        const queued = await enqueueStrandedIssueRecovery({
+          issueId: issue.id,
+          agentId,
+          reason: "issue_continuation_needed",
+          retryReason: "issue_continuation_needed",
+          source: "issue.productive_terminal_continuation_recovery",
+          retryOfRunId: successfulRun.id,
+        });
+        if (queued) {
+          result.continuationRequeued += 1;
+          result.issueIds.push(issue.id);
+        } else {
+          result.skipped += 1;
        }
-        result.skipped += 1;
        continue;
      }
      if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) {