[codex] Harden recovery issue handling (#4600)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - The control plane must recover stranded agent work without creating
new operational loops
> - Stranded recovery issues can themselves fail, and exposing raw retry
errors in comments can leak sensitive adapter details
> - New local companies also should not force a hire-approval gate
unless operators enable that policy
> - This pull request hardens recovery issue handling, redacts retry
failure details in issue copy, preserves `maxConcurrentRuns: 1`, and
flips new-hire approval to an opt-in default
> - The benefit is safer automatic recovery and smoother default company
setup without hidden migration conflicts

## What Changed

- Added migration `0071_default_hire_approval_off` and updated company
schema/import/export/docs so hire approvals default off and serialize
only when enabled.
- Added migration `0072_large_sandman` with a partial unique index
preventing duplicate active stranded recovery issues for the same source
issue.
- Blocked failed `stranded_issue_recovery` issues in place instead of
creating nested recovery issues.
- Redacted latest retry failure details from recovery issue comments
while still linking reviewers to run evidence.
- Allowed `maxConcurrentRuns: 1` to be honored by heartbeat concurrency
normalization.
- Added focused regression coverage for recovery recursion, redaction,
migration ordering, and concurrency behavior.

## Verification

- `pnpm --filter @paperclipai/db run check:migrations`
- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/recovery-classifiers.test.ts`
- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/company-portability.test.ts --pool=forks
--poolOptions.forks.isolate=true`
- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/agent-permissions-routes.test.ts --pool=forks
--poolOptions.forks.isolate=true`
- `pnpm --filter @paperclipai/server typecheck`
- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/heartbeat-process-recovery.test.ts --pool=forks
--poolOptions.forks.isolate=true` exits 0, but this host skipped the
embedded Postgres tests with the existing init guard.
- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/heartbeat-dependency-scheduling.test.ts
--pool=forks --poolOptions.forks.isolate=true` exits 0, but this host
skipped the embedded Postgres tests with the existing init guard.

## Risks

- Migration risk is low but this PR intentionally owns both new
migrations to avoid separate PR migration-journal conflicts.
- Recovery comments now require operators to inspect linked run evidence
for details instead of reading raw errors inline.
- The hire approval default changes behavior for newly created/imported
companies only; existing persisted company settings are not changed
except by the SQL default for future rows.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5 coding agent, tool-enabled terminal/GitHub
workflow, reasoning mode active. Context window not exposed in this
environment.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-04-27 15:02:47 -05:00 committed by GitHub
parent 6ccf80bcf2
commit 7a9b3a6037
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 16535 additions and 65 deletions

View file

@ -19,7 +19,6 @@ const baseAgent = {
adapterType: "process",
adapterConfig: {},
runtimeConfig: {},
defaultEnvironmentId: null,
budgetMonthlyCents: 0,
spentMonthlyCents: 0,
pauseReason: null,
@ -352,7 +351,6 @@ describe.sequential("agent permission routes", () => {
mockCompanySkillService.listRuntimeSkillEntries.mockResolvedValue([]);
mockCompanySkillService.resolveRequestedSkillKeys.mockImplementation(async (_companyId, requested) => requested);
mockBudgetService.upsertPolicy.mockResolvedValue(undefined);
mockEnvironmentService.getById.mockResolvedValue(null);
mockAgentInstructionsService.materializeManagedBundle.mockImplementation(
async (agent: Record<string, unknown>, files: Record<string, string>) => ({
bundle: null,
@ -375,9 +373,6 @@ describe.sequential("agent permission routes", () => {
mockInstanceSettingsService.getGeneral.mockResolvedValue({
censorUsernameInLogs: false,
});
mockEnsureOpenCodeModelConfiguredAndAvailable.mockResolvedValue([
{ id: "opencode/gpt-5-nano", label: "opencode/gpt-5-nano" },
]);
mockLogActivity.mockResolvedValue(undefined);
});

View file

@ -139,12 +139,12 @@ describe("company portability", () => {
brandColor: "#5c5fff",
logoAssetId: null,
logoUrl: null,
requireBoardApprovalForNewAgents: true,
requireBoardApprovalForNewAgents: false,
});
companySvc.create.mockResolvedValue({
id: "company-imported",
name: "Imported Paperclip",
requireBoardApprovalForNewAgents: true,
requireBoardApprovalForNewAgents: false,
});
agentSvc.list.mockResolvedValue([
{
@ -461,6 +461,32 @@ describe("company portability", () => {
expect(exported.warnings).toContain("Agent claudecoder PATH override was omitted from export because it is system-dependent.");
});
it("exports hire approval policy only when approval is required", async () => {
const portability = companyPortabilityService({} as any);
companySvc.getById.mockResolvedValueOnce({
id: "company-1",
name: "Paperclip",
description: null,
issuePrefix: "PAP",
brandColor: "#5c5fff",
logoAssetId: null,
logoUrl: null,
requireBoardApprovalForNewAgents: true,
});
const exported = await portability.exportBundle("company-1", {
include: {
company: true,
agents: false,
projects: false,
issues: false,
},
});
expect(asTextFile(exported.files[".paperclip.yaml"])).toContain("requireBoardApprovalForNewAgents: true");
});
it("exports default sidebar order into the Paperclip extension and manifest", async () => {
const portability = companyPortabilityService({} as any);
@ -2554,7 +2580,7 @@ describe("company portability", () => {
status: "idle",
}));
expect(companySvc.create).toHaveBeenCalledWith(expect.objectContaining({
requireBoardApprovalForNewAgents: true,
requireBoardApprovalForNewAgents: false,
}));
});

View file

@ -96,7 +96,16 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
}, 20_000);
afterEach(async () => {
vi.clearAllMocks();
mockAdapterExecute.mockReset();
mockAdapterExecute.mockImplementation(async () => ({
exitCode: 0,
signal: null,
timedOut: false,
errorMessage: null,
summary: "Dependency-aware heartbeat test run.",
provider: "test",
model: "test-model",
}));
runningProcesses.clear();
let idlePolls = 0;
for (let attempt = 0; attempt < 100; attempt += 1) {
@ -347,6 +356,126 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
expect(blockedWakeRequestCount).toBeGreaterThanOrEqual(2);
});
it("honors maxConcurrentRuns 1 by leaving a second assignment wake queued", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const firstIssueId = randomUUID();
const secondIssueId = randomUUID();
let finishFirstRun!: () => void;
const firstRunFinished = new Promise<void>((resolve) => {
finishFirstRun = resolve;
});
mockAdapterExecute.mockImplementationOnce(async () => {
await firstRunFinished;
return {
exitCode: 0,
signal: null,
timedOut: false,
errorMessage: null,
summary: "First assignment run completed.",
provider: "test",
model: "test-model",
};
});
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(issues).values([
{
id: firstIssueId,
companyId,
title: "First assignment",
status: "todo",
priority: "high",
assigneeAgentId: agentId,
},
{
id: secondIssueId,
companyId,
title: "Second assignment",
status: "todo",
priority: "high",
assigneeAgentId: agentId,
},
]);
try {
const firstWake = await heartbeat.wakeup(agentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: { issueId: firstIssueId },
contextSnapshot: { issueId: firstIssueId, wakeReason: "issue_assigned" },
});
expect(firstWake).not.toBeNull();
const firstRunStarted = await waitForCondition(async () => {
const run = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, firstWake!.id))
.then((rows) => rows[0] ?? null);
return run?.status === "running";
});
expect(firstRunStarted).toBe(true);
const firstAdapterStarted = await waitForCondition(async () => mockAdapterExecute.mock.calls.length === 1);
expect(firstAdapterStarted).toBe(true);
const secondWake = await heartbeat.wakeup(agentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: { issueId: secondIssueId },
contextSnapshot: { issueId: secondIssueId, wakeReason: "issue_assigned" },
});
expect(secondWake).not.toBeNull();
const secondRunWhileFirstRunning = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, secondWake!.id))
.then((rows) => rows[0] ?? null);
expect(secondRunWhileFirstRunning?.status).toBe("queued");
expect(mockAdapterExecute).toHaveBeenCalledTimes(1);
finishFirstRun();
const secondRunSucceeded = await waitForCondition(async () => {
const run = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, secondWake!.id))
.then((rows) => rows[0] ?? null);
return run?.status === "succeeded";
});
expect(secondRunSucceeded).toBe(true);
expect(mockAdapterExecute).toHaveBeenCalledTimes(2);
} finally {
finishFirstRun();
}
});
it("cancels stale queued runs when issue blockers are still unresolved", async () => {
const companyId = randomUUID();
const agentId = randomUUID();

View file

@ -468,6 +468,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
retryReason?: "assignment_recovery" | "issue_continuation_needed" | null;
assignToUser?: boolean;
activePauseHold?: boolean;
runErrorCode?: string | null;
runError?: string | null;
}) {
const companyId = randomUUID();
const agentId = randomUUID();
@ -509,7 +511,9 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
runId,
claimedAt: now,
finishedAt: new Date("2026-03-19T00:05:00.000Z"),
error: input.runStatus === "succeeded" ? null : "run failed before issue advanced",
error: input.runStatus === "succeeded"
? null
: ("runError" in input ? input.runError : "run failed before issue advanced"),
});
await db.insert(heartbeatRuns).values({
@ -531,8 +535,12 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
startedAt: now,
finishedAt: new Date("2026-03-19T00:05:00.000Z"),
updatedAt: new Date("2026-03-19T00:05:00.000Z"),
errorCode: input.runStatus === "succeeded" ? null : "process_lost",
error: input.runStatus === "succeeded" ? null : "run failed before issue advanced",
errorCode: input.runStatus === "succeeded"
? null
: ("runErrorCode" in input ? input.runErrorCode : "process_lost"),
error: input.runStatus === "succeeded"
? null
: ("runError" in input ? input.runError : "run failed before issue advanced"),
});
await db.insert(issues).values([
@ -659,6 +667,20 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
return recovery;
}
async function sourceBlockerIssueIds(companyId: string, sourceIssueId: string) {
return db
.select({ blockerIssueId: issueRelations.issueId })
.from(issueRelations)
.where(
and(
eq(issueRelations.companyId, companyId),
eq(issueRelations.relatedIssueId, sourceIssueId),
eq(issueRelations.type, "blocks"),
),
)
.then((rows) => rows.map((row) => row.blockerIssueId));
}
async function seedQueuedIssueRunFixture() {
const companyId = randomUUID();
const agentId = randomUUID();
@ -930,6 +952,81 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`);
});
it("blocks failed recovery work in place during immediate terminal-run cleanup", async () => {
const sourceIssueId = randomUUID();
const { companyId, agentId, runId, issueId } = await seedRunFixture({
agentStatus: "idle",
processPid: 999_999_999,
processLossRetryCount: 1,
runErrorCode: "process_lost",
runError: "Authorization: Bearer sk-test-recovery-secret",
});
await db
.update(issues)
.set({
title: "Recover stalled issue PAP-1",
originKind: "stranded_issue_recovery",
originId: sourceIssueId,
})
.where(eq(issues.id, issueId));
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
await db.insert(issues).values({
id: sourceIssueId,
companyId,
title: "Original stranded source",
status: "blocked",
priority: "medium",
issueNumber: 2,
identifier: `${issuePrefix}-2`,
});
await db.insert(issueRelations).values({
companyId,
issueId,
relatedIssueId: sourceIssueId,
type: "blocks",
});
const heartbeat = heartbeatService(db);
const result = await heartbeat.reapOrphanedRuns();
expect(result.reaped).toBe(1);
expect(result.runIds).toEqual([runId]);
const runs = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, agentId));
expect(runs).toHaveLength(1);
expect(runs[0]?.status).toBe("failed");
const recoveryIssue = await waitForValue(async () =>
db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => {
const issue = rows[0] ?? null;
return issue?.status === "blocked" ? issue : null;
})
);
expect(recoveryIssue?.assigneeAgentId).toBe(agentId);
expect(recoveryIssue?.originKind).toBe("stranded_issue_recovery");
expect(recoveryIssue?.originId).toBe(sourceIssueId);
expect(recoveryIssue?.executionRunId).toBeNull();
const nestedRecoveries = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"), eq(issues.originId, issueId)));
expect(nestedRecoveries).toHaveLength(0);
const comments = await waitForValue(async () => {
const rows = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
return rows.length > 0 ? rows : null;
});
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("stopped automatic stranded-work recovery");
expect(comments[0]?.body).toContain("recovery issues do not create nested `stranded_issue_recovery` issues");
expect(comments[0]?.body).toContain("Latest retry failure details were withheld from the issue thread");
expect(comments[0]?.body).not.toContain("sk-test-recovery-secret");
await expect(sourceBlockerIssueIds(companyId, sourceIssueId)).resolves.toEqual([issueId]);
});
it("does not block paused-tree work when immediate continuation recovery is suppressed by the hold", async () => {
const { companyId, agentId, runId, issueId } = await seedRunFixture({
agentStatus: "idle",
@ -1108,6 +1205,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
status: "todo",
runStatus: "failed",
retryReason: "assignment_recovery",
runErrorCode: "process_lost",
runError: "Authorization: Bearer sk-test-recovery-secret",
});
const heartbeat = heartbeatService(db);
@ -1127,11 +1226,12 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
previousStatus: "todo",
retryReason: "assignment_recovery",
});
expect(recovery.description ?? "").not.toContain("sk-test-recovery-secret");
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried dispatch");
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
expect(comments[0]?.body).toContain("Latest retry failure details were withheld from the issue thread");
expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`);
});
@ -1446,10 +1546,217 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried continuation");
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
expect(comments[0]?.body).toContain("Latest retry failure details were withheld from the issue thread");
expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`);
});
it("redacts error-code-only stranded recovery failures in issue copy", async () => {
const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({
status: "in_progress",
runStatus: "failed",
retryReason: "issue_continuation_needed",
runErrorCode: "adapter_exit_code",
runError: null,
});
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileStrandedAssignedIssues();
expect(result.escalated).toBe(1);
const recovery = await expectStrandedRecoveryArtifacts({
companyId,
agentId,
issueId,
runId,
previousStatus: "in_progress",
retryReason: "issue_continuation_needed",
});
expect(recovery.description).toContain("Latest retry failure details were withheld from the issue thread");
expect(recovery.description).not.toContain("- Failure: none recorded");
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("Latest retry failure details were withheld from the issue thread");
expect(comments[0]?.body).not.toContain("- Failure: none recorded");
});
it("reuses the raced stranded recovery issue when duplicate active recovery creation conflicts", async () => {
const { companyId, issueId } = await seedStrandedIssueFixture({
status: "in_progress",
runStatus: "failed",
retryReason: "issue_continuation_needed",
});
const heartbeat = heartbeatService(db);
const results = await Promise.allSettled(
Array.from({ length: 8 }, () => heartbeat.reconcileStrandedAssignedIssues()),
);
expect(results.every((result) => result.status === "fulfilled")).toBe(true);
const recoveries = await db
.select()
.from(issues)
.where(and(
eq(issues.companyId, companyId),
eq(issues.originKind, "stranded_issue_recovery"),
eq(issues.originId, issueId),
));
expect(recoveries).toHaveLength(1);
await expect(sourceBlockerIssueIds(companyId, issueId)).resolves.toEqual([recoveries[0]?.id]);
});
it("blocks stranded recovery issues in place instead of creating nested recovery issues", async () => {
const sourceIssueId = randomUUID();
const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({
status: "in_progress",
runStatus: "failed",
});
await db
.update(issues)
.set({
title: "Recover stalled issue PAP-1",
originKind: "stranded_issue_recovery",
originId: sourceIssueId,
})
.where(eq(issues.id, issueId));
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
await db.insert(issues).values({
id: sourceIssueId,
companyId,
title: "Original stranded source",
status: "blocked",
priority: "medium",
issueNumber: 2,
identifier: `${issuePrefix}-2`,
});
await db.insert(issueRelations).values({
companyId,
issueId,
relatedIssueId: sourceIssueId,
type: "blocks",
});
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileStrandedAssignedIssues();
expect(result.dispatchRequeued).toBe(0);
expect(result.continuationRequeued).toBe(0);
expect(result.escalated).toBe(1);
expect(result.issueIds).toEqual([issueId]);
const recoveryIssue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null);
expect(recoveryIssue?.status).toBe("blocked");
expect(recoveryIssue?.assigneeAgentId).toBe(agentId);
expect(recoveryIssue?.originKind).toBe("stranded_issue_recovery");
expect(recoveryIssue?.originId).toBe(sourceIssueId);
const nestedRecoveries = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"), eq(issues.originId, issueId)));
expect(nestedRecoveries).toHaveLength(0);
const runs = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.agentId, agentId));
expect(runs).toHaveLength(1);
expect(runs[0]?.id).toBe(runId);
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("stopped automatic stranded-work recovery");
expect(comments[0]?.body).toContain("Latest retry failure details were withheld from the issue thread");
expect(comments[0]?.body).toContain("recovery issues do not create nested `stranded_issue_recovery` issues");
await expect(sourceBlockerIssueIds(companyId, sourceIssueId)).resolves.toEqual([issueId]);
});
it("keeps repeated recovery failures on the same canonical recovery issue", async () => {
const sourceIssueId = randomUUID();
const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({
status: "in_progress",
runStatus: "failed",
});
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
await db.insert(issues).values({
id: sourceIssueId,
companyId,
title: "Original stranded source",
status: "blocked",
priority: "medium",
issueNumber: 2,
identifier: `${issuePrefix}-2`,
});
await db
.update(issues)
.set({
title: "Recover stalled issue PAP-1",
originKind: "stranded_issue_recovery",
originId: sourceIssueId,
})
.where(eq(issues.id, issueId));
await db.insert(issueRelations).values({
companyId,
issueId,
relatedIssueId: sourceIssueId,
type: "blocks",
});
const heartbeat = heartbeatService(db);
const firstResult = await heartbeat.reconcileStrandedAssignedIssues();
expect(firstResult.escalated).toBe(1);
expect(firstResult.issueIds).toEqual([issueId]);
const secondRunId = randomUUID();
await db.insert(heartbeatRuns).values({
id: secondRunId,
companyId,
agentId,
invocationSource: "assignment",
triggerDetail: "system",
status: "failed",
contextSnapshot: {
issueId,
taskId: issueId,
wakeReason: "issue_assigned",
source: "stranded_issue_recovery",
},
startedAt: new Date("2030-03-19T00:10:00.000Z"),
finishedAt: new Date("2030-03-19T00:15:00.000Z"),
createdAt: new Date("2030-03-19T00:10:00.000Z"),
updatedAt: new Date("2030-03-19T00:15:00.000Z"),
errorCode: "adapter_failed",
error: "adapter failed while retrying recovery issue",
});
await db
.update(issues)
.set({
status: "in_progress",
checkoutRunId: secondRunId,
executionRunId: null,
})
.where(eq(issues.id, issueId));
const secondResult = await heartbeat.reconcileStrandedAssignedIssues();
expect(secondResult.dispatchRequeued).toBe(0);
expect(secondResult.continuationRequeued).toBe(0);
expect(secondResult.escalated).toBe(1);
expect(secondResult.issueIds).toEqual([issueId]);
const recoveryIssuesForSource = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"), eq(issues.originId, sourceIssueId)));
expect(recoveryIssuesForSource.map((issue) => issue.id)).toEqual([issueId]);
const nestedRecoveries = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"), eq(issues.originId, issueId)));
expect(nestedRecoveries).toHaveLength(0);
await expect(sourceBlockerIssueIds(companyId, sourceIssueId)).resolves.toEqual([issueId]);
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(2);
expect(comments[1]?.body).toContain("Latest retry failure details were withheld from the issue thread");
});
it("does not escalate paused-tree recovery when the automatic continuation retry was cancelled by the hold", async () => {
const { companyId, agentId, issueId } = await seedStrandedIssueFixture({
status: "in_progress",

View file

@ -10,6 +10,7 @@ import {
buildRunLivenessContinuationIdempotencyKey,
classifyIssueGraphLiveness,
decideRunLivenessContinuation,
isStrandedIssueRecoveryOriginKind,
parseIssueGraphLivenessIncidentKey,
} from "../services/recovery/index.ts";
@ -143,4 +144,11 @@ describe("recovery classifier boundary", () => {
nextAttempt: 1,
})).toBe("run_liveness_continuation:issue-1:run-1:plan_only:1");
});
it("classifies stranded recovery origins as recovery-owned work", () => {
expect(isStrandedIssueRecoveryOriginKind("stranded_issue_recovery")).toBe(true);
expect(isStrandedIssueRecoveryOriginKind("harness_liveness_escalation")).toBe(false);
expect(isStrandedIssueRecoveryOriginKind("manual")).toBe(false);
expect(isStrandedIssueRecoveryOriginKind(null)).toBe(false);
});
});

View file

@ -2264,7 +2264,7 @@ function buildEnvInputMap(inputs: CompanyPortabilityEnvInput[]) {
}
function readCompanyApprovalDefault(_frontmatter: Record<string, unknown>) {
return true;
return false;
}
function readIncludeEntries(frontmatter: Record<string, unknown>): CompanyPackageIncludeEntry[] {
@ -3465,7 +3465,7 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
company: stripEmptyValues({
brandColor: company.brandColor ?? null,
logoPath: companyLogoPath,
requireBoardApprovalForNewAgents: company.requireBoardApprovalForNewAgents ? undefined : false,
requireBoardApprovalForNewAgents: company.requireBoardApprovalForNewAgents ? true : undefined,
feedbackDataSharingEnabled: company.feedbackDataSharingEnabled ? true : undefined,
feedbackDataSharingConsentAt: company.feedbackDataSharingConsentAt?.toISOString() ?? null,
feedbackDataSharingConsentByUserId: company.feedbackDataSharingConsentByUserId ?? null,
@ -3986,8 +3986,8 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
description: include.company ? (sourceManifest.company?.description ?? null) : null,
brandColor: include.company ? (sourceManifest.company?.brandColor ?? null) : null,
requireBoardApprovalForNewAgents: include.company
? (sourceManifest.company?.requireBoardApprovalForNewAgents ?? true)
: true,
? (sourceManifest.company?.requireBoardApprovalForNewAgents ?? false)
: false,
feedbackDataSharingEnabled: include.company
? (sourceManifest.company?.feedbackDataSharingEnabled ?? false)
: false,

View file

@ -101,6 +101,7 @@ import {
} from "./execution-workspace-policy.js";
import { instanceSettingsService } from "./instance-settings.js";
import {
RECOVERY_ORIGIN_KINDS,
RUN_LIVENESS_CONTINUATION_REASON,
buildRunLivenessContinuationIdempotencyKey,
decideRunLivenessContinuation,
@ -133,6 +134,7 @@ const MAX_RUN_EVENT_PAYLOAD_ARRAY_ITEMS = 50;
const MAX_RUN_EVENT_PAYLOAD_OBJECT_KEYS = 100;
const MAX_RUN_EVENT_PAYLOAD_DEPTH = 6;
const HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT = AGENT_DEFAULT_MAX_CONCURRENT_RUNS;
const HEARTBEAT_MAX_CONCURRENT_RUNS_MIN = 1;
const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
const LIVENESS_BOOKKEEPING_ACTIVITY_ACTIONS = [
"environment.lease_acquired",
@ -848,7 +850,7 @@ export function compactRunLogChunk(chunk: string, maxChars = MAX_PERSISTED_LOG_C
function normalizeMaxConcurrentRuns(value: unknown) {
const parsed = Math.floor(asNumber(value, HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT));
if (!Number.isFinite(parsed)) return HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT;
return Math.max(HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT, Math.min(HEARTBEAT_MAX_CONCURRENT_RUNS_MAX, parsed));
return Math.max(HEARTBEAT_MAX_CONCURRENT_RUNS_MIN, Math.min(HEARTBEAT_MAX_CONCURRENT_RUNS_MAX, parsed));
}
interface WakeupOptions {
@ -6193,6 +6195,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
}
const shouldBlockImmediately =
issue.originKind === RECOVERY_ORIGIN_KINDS.strandedIssueRecovery ||
!recoveryAgentInvokable ||
!recoveryAgent ||
didAutomaticRecoveryFail(run, issue.status === "todo" ? "assignment_recovery" : "issue_continuation_needed");

View file

@ -4,6 +4,7 @@ export {
RECOVERY_REASON_KINDS,
buildIssueGraphLivenessIncidentKey,
buildIssueGraphLivenessLeafKey,
isStrandedIssueRecoveryOriginKind,
parseIssueGraphLivenessIncidentKey,
} from "./origins.js";
export type {

View file

@ -17,6 +17,10 @@ export type RecoveryOriginKind = typeof RECOVERY_ORIGIN_KINDS[keyof typeof RECOV
export type RecoveryReasonKind = typeof RECOVERY_REASON_KINDS[keyof typeof RECOVERY_REASON_KINDS];
export type RecoveryKeyPrefix = typeof RECOVERY_KEY_PREFIXES[keyof typeof RECOVERY_KEY_PREFIXES];
export function isStrandedIssueRecoveryOriginKind(originKind: string | null | undefined) {
return originKind === RECOVERY_ORIGIN_KINDS.strandedIssueRecovery;
}
export function buildIssueGraphLivenessIncidentKey(input: {
companyId: string;
issueId: string;

View file

@ -35,6 +35,7 @@ import { getRunLogStore } from "../run-log-store.js";
import {
RECOVERY_ORIGIN_KINDS,
buildIssueGraphLivenessLeafKey,
isStrandedIssueRecoveryOriginKind,
parseIssueGraphLivenessIncidentKey,
} from "./origins.js";
import {
@ -101,22 +102,9 @@ function readNonEmptyString(value: unknown): string | null {
function summarizeRunFailureForIssueComment(run: LatestIssueRun) {
if (!run) return null;
const errorCode = readNonEmptyString(run.errorCode)?.trim() ?? null;
const rawError = readNonEmptyString(run.error)?.trim() ?? null;
const apiMessageMatch = rawError?.match(/"message"\s*:\s*"([^"]+)"/);
const firstLine = rawError
?.split(/\r?\n/)
.map((line) => line.trim())
.find(Boolean) ?? null;
const summarySource = apiMessageMatch?.[1] ?? firstLine;
const summary =
summarySource && summarySource.length > 240
? `${summarySource.slice(0, 237)}...`
: summarySource;
if (errorCode && summary) return ` Latest retry failure: \`${errorCode}\` - ${summary}.`;
if (errorCode) return ` Latest retry failure: \`${errorCode}\`.`;
if (summary) return ` Latest retry failure: ${summary}.`;
if (readNonEmptyString(run.error) || readNonEmptyString(run.errorCode)) {
return " Latest retry failure details were withheld from the issue thread; inspect the linked run for evidence.";
}
return null;
}
@ -187,6 +175,19 @@ function isAgentInvokable(agent: typeof agents.$inferSelect | null | undefined)
return Boolean(agent && !["paused", "terminated", "pending_approval"].includes(agent.status));
}
function isStrandedIssueRecoveryIssue(issue: Pick<typeof issues.$inferSelect, "originKind">) {
return isStrandedIssueRecoveryOriginKind(issue.originKind);
}
function isUnsuccessfulTerminalIssueRun(latestRun: LatestIssueRun) {
return Boolean(
latestRun &&
UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
),
);
}
function parseLivenessIncidentKey(incidentKey: string | null | undefined) {
if (!incidentKey) return null;
return parseIssueGraphLivenessIncidentKey(incidentKey);
@ -813,6 +814,16 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
);
}
function isUniqueStrandedIssueRecoveryConflict(error: unknown) {
if (!error || typeof error !== "object") return false;
const maybe = error as { code?: string; constraint?: string; message?: string };
return maybe.code === "23505" &&
(
maybe.constraint === "issues_active_stranded_issue_recovery_uq" ||
typeof maybe.message === "string" && maybe.message.includes("issues_active_stranded_issue_recovery_uq")
);
}
async function ensureSourceIssueBlockedByStaleEvaluation(input: {
sourceIssue: typeof issues.$inferSelect | null;
evaluationIssue: { id: string; identifier: string | null };
@ -1257,6 +1268,8 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
latestRun: LatestIssueRun;
previousStatus: "todo" | "in_progress";
}) {
if (isStrandedIssueRecoveryIssue(input.issue)) return null;
const existing = await findOpenStrandedIssueRecoveryIssue(input.issue.companyId, input.issue.id);
if (existing) return existing;
@ -1264,32 +1277,40 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
if (!ownerAgentId) return null;
const prefix = await getCompanyIssuePrefix(input.issue.companyId);
const recovery = await issuesSvc.create(input.issue.companyId, {
title: `Recover stalled issue ${input.issue.identifier ?? input.issue.title}`,
description: buildStrandedIssueRecoveryDescription({
issue: input.issue,
latestRun: input.latestRun,
previousStatus: input.previousStatus,
prefix,
}),
status: "todo",
priority: input.issue.priority,
parentId: input.issue.id,
projectId: input.issue.projectId,
goalId: input.issue.goalId,
assigneeAgentId: ownerAgentId,
originKind: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
originId: input.issue.id,
originRunId: input.latestRun?.id ?? null,
originFingerprint: [
STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
input.issue.companyId,
input.issue.id,
input.latestRun?.id ?? "no-run",
].join(":"),
billingCode: input.issue.billingCode,
inheritExecutionWorkspaceFromIssueId: input.issue.id,
});
let recovery: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
recovery = await issuesSvc.create(input.issue.companyId, {
title: `Recover stalled issue ${input.issue.identifier ?? input.issue.title}`,
description: buildStrandedIssueRecoveryDescription({
issue: input.issue,
latestRun: input.latestRun,
previousStatus: input.previousStatus,
prefix,
}),
status: "todo",
priority: input.issue.priority,
parentId: input.issue.id,
projectId: input.issue.projectId,
goalId: input.issue.goalId,
assigneeAgentId: ownerAgentId,
originKind: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
originId: input.issue.id,
originRunId: input.latestRun?.id ?? null,
originFingerprint: [
STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
input.issue.companyId,
input.issue.id,
input.latestRun?.id ?? "no-run",
].join(":"),
billingCode: input.issue.billingCode,
inheritExecutionWorkspaceFromIssueId: input.issue.id,
});
} catch (error) {
if (!isUniqueStrandedIssueRecoveryConflict(error)) throw error;
const raced = await findOpenStrandedIssueRecoveryIssue(input.issue.companyId, input.issue.id);
if (!raced) throw error;
return raced;
}
await deps.enqueueWakeup(ownerAgentId, {
source: "assignment",
@ -1315,6 +1336,78 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
return recovery;
}
function buildRecoveryIssueInPlaceEscalationComment(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: LatestIssueRun;
prefix: string;
}) {
const runLink = input.latestRun
? runUiLink({ id: input.latestRun.id, agentId: input.latestRun.agentId }, input.prefix)
: "none";
const retryReason = readNonEmptyString(parseObject(input.latestRun?.contextSnapshot)?.retryReason) ?? "none";
const failureSummary = summarizeRunFailureForIssueComment(input.latestRun);
return [
"Paperclip stopped automatic stranded-work recovery for this recovery issue.",
"",
`- Recovery issue: ${issueUiLink({ identifier: input.issue.identifier, id: input.issue.id }, input.prefix)}`,
`- Previous status: \`${input.previousStatus}\``,
`- Latest run: ${runLink}`,
`- Latest run status: \`${input.latestRun?.status ?? "unknown"}\``,
`- Retry reason: \`${retryReason}\``,
failureSummary ? `- Failure: ${failureSummary.trim()}` : "- Failure: none recorded",
"- Guard: recovery issues do not create nested `stranded_issue_recovery` issues.",
"",
"Next action: the current recovery owner should inspect the failed run evidence, restore a live execution path or record the manual resolution, then move this recovery issue out of `blocked`.",
].join("\n");
}
async function escalateStrandedRecoveryIssueInPlace(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: LatestIssueRun;
}) {
const updated = await issuesSvc.update(input.issue.id, { status: "blocked" });
if (!updated) return null;
const prefix = await getCompanyIssuePrefix(input.issue.companyId);
await issuesSvc.addComment(
input.issue.id,
buildRecoveryIssueInPlaceEscalationComment({
issue: input.issue,
previousStatus: input.previousStatus,
latestRun: input.latestRun,
prefix,
}),
{},
);
await logActivity(db, {
companyId: input.issue.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: null,
action: "issue.updated",
entityType: "issue",
entityId: input.issue.id,
details: {
identifier: input.issue.identifier,
status: "blocked",
previousStatus: input.previousStatus,
source: "recovery.reconcile_stranded_recovery_issue",
latestRunId: input.latestRun?.id ?? null,
latestRunStatus: input.latestRun?.status ?? null,
latestRunErrorCode: input.latestRun?.errorCode ?? null,
originKind: input.issue.originKind,
originId: input.issue.originId,
},
});
return updated;
}
async function existingBlockerIssueIds(companyId: string, issueId: string) {
return db
.select({ blockerIssueId: issueRelations.issueId })
@ -1357,6 +1450,14 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
latestRun: LatestIssueRun;
comment: string;
}) {
if (isStrandedIssueRecoveryIssue(input.issue)) {
return escalateStrandedRecoveryIssueInPlace({
issue: input.issue,
previousStatus: input.previousStatus,
latestRun: input.latestRun,
});
}
const recoveryIssue = await ensureStrandedIssueRecoveryIssue({
issue: input.issue,
previousStatus: input.previousStatus,
@ -1457,6 +1558,21 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
}
const latestRun = await getLatestIssueRun(issue.companyId, issue.id);
if (isStrandedIssueRecoveryIssue(issue) && isUnsuccessfulTerminalIssueRun(latestRun)) {
const updated = await escalateStrandedRecoveryIssueInPlace({
issue,
previousStatus: issue.status as "todo" | "in_progress",
latestRun,
});
if (updated) {
result.escalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (issue.status === "todo") {
if (!latestRun || latestRun.status === "succeeded") {
result.skipped += 1;