Add issue controls and retry-now recovery (#5426)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - Issue operators need clear controls for execution settings, model
overrides, and recovery retries
> - Existing issue properties hid useful adapter override state and did
not expose a board-triggered retry for scheduled heartbeat recovery
> - Scheduled retries also need to respect the same safety gates as
normal execution instead of bypassing budget, review, pause, dependency,
or terminal-state checks
> - This pull request adds the issue property controls and retry-now
surfaces together because they share the issue details/properties UI
> - The benefit is that operators can inspect and adjust issue execution
settings and safely trigger pending scheduled recovery without hidden
control-plane behavior

## What Changed

- Adds editable issue assignee model override controls in
`IssueProperties`, with focused coverage.
- Removes the stale workspace tasks link from issue properties.
- Adds a scheduled retry `retry-now` backend path and shared response
types.
- Adds main-pane and properties-pane scheduled retry UI, backed by a
shared `useRetryNowMutation` hook.
- Adds suppression coverage for budget hard stops, review participant
changes, subtree pause holds, unresolved blockers, terminal issues, and
company scoping.
- Updates the `IssueProperties` test harness with toast actions required
by the retry-now hook.

## Verification

- `pnpm exec vitest run ui/src/components/IssueProperties.test.tsx
ui/src/components/IssueScheduledRetryCard.test.tsx` — 31 passed.
- `pnpm exec vitest run
server/src/__tests__/issue-scheduled-retry-routes.test.ts` — exited 0,
but this host skipped the embedded Postgres route tests with: `Postgres
init script exited with code null. Please check the logs for extra info.
The data directory might already exist.`
- Pairwise merge check against the assigned-backlog PR branch completed
without conflicts via `git merge --no-commit --no-ff` in a temporary
worktree.

### Visual verification screenshots

Storybook story: `Product/Issue Scheduled retry surfaces /
ScheduledRetrySurfaces`.

![Scheduled retry card and issue properties rows -
desktop](https://raw.githubusercontent.com/paperclipai/paperclip/62fb566f357312b43b9162af02252d0175530a8f/docs/assets/pr-5426/scheduled-retry-story-desktop.png)

![Scheduled retry card and issue properties rows -
mobile](https://raw.githubusercontent.com/paperclipai/paperclip/62fb566f357312b43b9162af02252d0175530a8f/docs/assets/pr-5426/scheduled-retry-story-mobile.png)

## Risks

- Medium: this touches issue execution/retry behavior, so CI should run
the embedded Postgres route tests on a host that can initialize
Postgres.
- Low-to-medium UI risk around duplicated retry-now entry points; both
surfaces share one mutation hook to keep behavior consistent.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex coding agent, GPT-5 model family (`gpt-5`), tool-enabled
Paperclip heartbeat environment. Context window and internal reasoning
mode are not exposed by the runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-05-07 12:23:13 -05:00 committed by GitHub
parent d0e9cc76f2
commit 772fc92619
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 2269 additions and 117 deletions

View file

@ -4712,6 +4712,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
issueId: string | null;
details: Record<string, unknown>;
};
type BlockedScheduledRetryGate = Extract<ScheduledRetryGate, { allowed: false }>;
async function evaluateScheduledRetryGate(input: {
run: typeof heartbeatRuns.$inferSelect;
@ -4960,6 +4961,111 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
return cancelled;
}
async function promoteScheduledRetryRun(
dueRun: typeof heartbeatRuns.$inferSelect,
now: Date,
): Promise<
| { outcome: "promoted"; run: typeof heartbeatRuns.$inferSelect }
| {
outcome: "gate_suppressed";
run: typeof heartbeatRuns.$inferSelect;
reason: string;
errorCode: BlockedScheduledRetryGate["errorCode"];
}
| { outcome: "not_promoted"; run: typeof heartbeatRuns.$inferSelect | null }
> {
const agent = await getAgent(dueRun.agentId);
if (!agent) {
const gate = {
allowed: false as const,
reason: "Scheduled retry suppressed because the agent no longer exists",
errorCode: "agent_not_invokable" as const,
issueId: readNonEmptyString(parseObject(dueRun.contextSnapshot).issueId),
details: { agentId: dueRun.agentId },
};
const cancelled = await cancelScheduledRetryForGate(dueRun, gate, now);
return cancelled
? {
outcome: "gate_suppressed",
run: cancelled,
reason: gate.reason,
errorCode: gate.errorCode,
}
: { outcome: "not_promoted", run: null };
}
const contextSnapshot = parseObject(dueRun.contextSnapshot);
const gate = await evaluateScheduledRetryGate({
run: dueRun,
agent,
contextSnapshot,
retryReason: dueRun.scheduledRetryReason,
enforceIssueExecutionLock: dueRun.scheduledRetryReason === MAX_TURN_CONTINUATION_RETRY_REASON,
});
if (!gate.allowed) {
if (
gate.errorCode === "issue_not_found" &&
dueRun.scheduledRetryReason !== MAX_TURN_CONTINUATION_RETRY_REASON
) {
// Preserve legacy transient retry behavior for runs that only carry a
// loose task context rather than a persisted issue row.
} else {
const cancelled = await cancelScheduledRetryForGate(dueRun, gate, now);
return cancelled
? {
outcome: "gate_suppressed",
run: cancelled,
reason: gate.reason,
errorCode: gate.errorCode,
}
: { outcome: "not_promoted", run: null };
}
}
const promoted = await db
.update(heartbeatRuns)
.set({
status: "queued",
updatedAt: now,
})
.where(
and(
eq(heartbeatRuns.id, dueRun.id),
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.returning()
.then((rows) => rows[0] ?? null);
if (!promoted) return { outcome: "not_promoted", run: null };
await appendRunEvent(promoted, await nextRunEventSeq(promoted.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry became due and was promoted to the queued run pool",
payload: {
scheduledRetryAttempt: promoted.scheduledRetryAttempt,
scheduledRetryAt: promoted.scheduledRetryAt ? new Date(promoted.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: promoted.scheduledRetryReason,
},
});
publishLiveEvent({
companyId: promoted.companyId,
type: "heartbeat.run.queued",
payload: {
runId: promoted.id,
agentId: promoted.agentId,
invocationSource: promoted.invocationSource,
triggerDetail: promoted.triggerDetail,
wakeupRequestId: promoted.wakeupRequestId,
},
});
return { outcome: "promoted", run: promoted };
}
async function scheduleBoundedRetryForRun(
run: typeof heartbeatRuns.$inferSelect,
agent: typeof agents.$inferSelect,
@ -5384,81 +5490,10 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
const promotedRunIds: string[] = [];
for (const dueRun of dueRuns) {
const agent = await getAgent(dueRun.agentId);
if (!agent) {
await cancelScheduledRetryForGate(dueRun, {
allowed: false,
reason: "Scheduled retry suppressed because the agent no longer exists",
errorCode: "agent_not_invokable",
issueId: readNonEmptyString(parseObject(dueRun.contextSnapshot).issueId),
details: { agentId: dueRun.agentId },
}, now);
continue;
const result = await promoteScheduledRetryRun(dueRun, now);
if (result.outcome === "promoted") {
promotedRunIds.push(result.run.id);
}
const contextSnapshot = parseObject(dueRun.contextSnapshot);
const gate = await evaluateScheduledRetryGate({
run: dueRun,
agent,
contextSnapshot,
retryReason: dueRun.scheduledRetryReason,
enforceIssueExecutionLock: dueRun.scheduledRetryReason === MAX_TURN_CONTINUATION_RETRY_REASON,
});
if (!gate.allowed) {
if (
gate.errorCode === "issue_not_found" &&
dueRun.scheduledRetryReason !== MAX_TURN_CONTINUATION_RETRY_REASON
) {
// Preserve legacy transient retry behavior for runs that only carry a
// loose task context rather than a persisted issue row.
} else {
await cancelScheduledRetryForGate(dueRun, gate, now);
continue;
}
}
const promoted = await db
.update(heartbeatRuns)
.set({
status: "queued",
updatedAt: now,
})
.where(
and(
eq(heartbeatRuns.id, dueRun.id),
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.returning()
.then((rows) => rows[0] ?? null);
if (!promoted) continue;
promotedRunIds.push(promoted.id);
await appendRunEvent(promoted, await nextRunEventSeq(promoted.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry became due and was promoted to the queued run pool",
payload: {
scheduledRetryAttempt: promoted.scheduledRetryAttempt,
scheduledRetryAt: promoted.scheduledRetryAt ? new Date(promoted.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: promoted.scheduledRetryReason,
},
});
publishLiveEvent({
companyId: promoted.companyId,
type: "heartbeat.run.queued",
payload: {
runId: promoted.id,
agentId: promoted.agentId,
invocationSource: promoted.invocationSource,
triggerDetail: promoted.triggerDetail,
wakeupRequestId: promoted.wakeupRequestId,
},
});
}
return {
@ -5467,6 +5502,182 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
};
}
async function getIssueRetryRun(
companyId: string,
issueId: string,
statuses: Array<"scheduled_retry" | "queued" | "running" | "cancelled">,
) {
if (statuses.length === 0) return null;
return db
.select({
run: heartbeatRuns,
agentName: agents.name,
})
.from(heartbeatRuns)
.innerJoin(agents, eq(heartbeatRuns.agentId, agents.id))
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, statuses),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
sql`${heartbeatRuns.retryOfRunId} is not null`,
),
)
.orderBy(desc(heartbeatRuns.updatedAt), desc(heartbeatRuns.createdAt), desc(heartbeatRuns.id))
.limit(1)
.then((rows) => rows[0] ?? null);
}
function summarizeIssueScheduledRetryRun(
row: { run: typeof heartbeatRuns.$inferSelect; agentName: string | null },
) {
return {
runId: row.run.id,
status: row.run.status as "scheduled_retry" | "queued" | "running" | "cancelled",
agentId: row.run.agentId,
agentName: row.agentName,
retryOfRunId: row.run.retryOfRunId,
scheduledRetryAt: row.run.scheduledRetryAt,
scheduledRetryAttempt: row.run.scheduledRetryAttempt,
scheduledRetryReason: row.run.scheduledRetryReason,
error: row.run.error,
errorCode: row.run.errorCode,
};
}
async function retryScheduledRetryNow(input: {
issueId: string;
actor?: { actorType?: "user" | "agent" | "system"; actorId?: string | null };
now?: Date;
}) {
const now = input.now ?? new Date();
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })
.from(issues)
.where(eq(issues.id, input.issueId))
.then((rows) => rows[0] ?? null);
if (!issue) throw notFound("Issue not found");
const scheduled = await getIssueRetryRun(issue.companyId, issue.id, ["scheduled_retry"]);
if (!scheduled) {
const alreadyPromoted = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running"]);
if (alreadyPromoted) {
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry: summarizeIssueScheduledRetryRun(alreadyPromoted),
};
}
return {
outcome: "no_scheduled_retry" as const,
message: "No live scheduled retry exists for this issue",
scheduledRetry: null,
};
}
const contextSnapshot = {
...parseObject(scheduled.run.contextSnapshot),
scheduledRetryAt: now.toISOString(),
retryNowRequestedAt: now.toISOString(),
retryNowRequestedByActorType: input.actor?.actorType ?? null,
retryNowRequestedByActorId: input.actor?.actorId ?? null,
};
const updated = await db.transaction(async (tx) => {
const row = await tx
.update(heartbeatRuns)
.set({
scheduledRetryAt: now,
contextSnapshot,
updatedAt: now,
})
.where(and(eq(heartbeatRuns.id, scheduled.run.id), eq(heartbeatRuns.status, "scheduled_retry")))
.returning()
.then((rows) => rows[0] ?? null);
if (!row) return null;
if (row.wakeupRequestId) {
const wakeupPayload = {
...(parseObject(
await tx
.select({ payload: agentWakeupRequests.payload })
.from(agentWakeupRequests)
.where(eq(agentWakeupRequests.id, row.wakeupRequestId))
.then((rows) => rows[0]?.payload ?? null),
)),
scheduledRetryAt: now.toISOString(),
retryNowRequestedAt: now.toISOString(),
};
await tx
.update(agentWakeupRequests)
.set({
payload: wakeupPayload,
updatedAt: now,
})
.where(eq(agentWakeupRequests.id, row.wakeupRequestId));
}
return row;
});
if (!updated) {
const alreadyPromoted = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running"]);
if (alreadyPromoted) {
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry: summarizeIssueScheduledRetryRun(alreadyPromoted),
};
}
return {
outcome: "no_scheduled_retry" as const,
message: "No live scheduled retry exists for this issue",
scheduledRetry: null,
};
}
await appendRunEvent(updated, await nextRunEventSeq(updated.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry was requested to run now",
payload: {
issueId: issue.id,
scheduledRetryAttempt: updated.scheduledRetryAttempt,
scheduledRetryAt: updated.scheduledRetryAt ? new Date(updated.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: updated.scheduledRetryReason,
requestedByActorType: input.actor?.actorType ?? null,
requestedByActorId: input.actor?.actorId ?? null,
},
});
const promotion = await promoteScheduledRetryRun(updated, now);
const promotedRow = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running", "cancelled"]);
const scheduledRetry = promotedRow
? summarizeIssueScheduledRetryRun(promotedRow)
: summarizeIssueScheduledRetryRun({ run: promotion.run ?? updated, agentName: scheduled.agentName });
if (promotion.outcome === "promoted") {
return {
outcome: "promoted" as const,
message: "Scheduled retry was promoted to the queued run pool",
scheduledRetry,
};
}
if (promotion.outcome === "gate_suppressed") {
return {
outcome: "gate_suppressed" as const,
message: promotion.reason,
scheduledRetry,
};
}
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry,
};
}
function parseHeartbeatPolicy(agent: typeof agents.$inferSelect) {
const runtimeConfig = parseObject(agent.runtimeConfig);
const heartbeat = parseObject(runtimeConfig.heartbeat);
@ -9383,6 +9594,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
reapOrphanedRuns,
promoteDueScheduledRetries,
retryScheduledRetryNow,
resumeQueuedRuns,

View file

@ -156,6 +156,19 @@ type IssueActiveRunRow = {
finishedAt: Date | null;
createdAt: Date;
};
type IssueScheduledRetryRow = {
runId: string;
status: "scheduled_retry" | "queued" | "running" | "cancelled";
agentId: string;
agentName: string | null;
retryOfRunId: string | null;
scheduledRetryAt: Date | null;
scheduledRetryAttempt: number;
scheduledRetryReason: string | null;
retryExhaustedReason?: string | null;
error?: string | null;
errorCode?: string | null;
};
type IssueWithLabels = IssueRow & { labels: IssueLabelRow[]; labelIds: string[] };
type IssueWithLabelsAndRun = IssueWithLabels & { activeRun: IssueActiveRunRow | null };
type IssueUserCommentStats = {
@ -1686,6 +1699,36 @@ export function issueService(db: Db) {
return enriched;
}
async function getCurrentScheduledRetryForIssue(issueId: string, companyId: string): Promise<IssueScheduledRetryRow | null> {
const row = await db
.select({
runId: heartbeatRuns.id,
status: heartbeatRuns.status,
agentId: heartbeatRuns.agentId,
agentName: agents.name,
retryOfRunId: heartbeatRuns.retryOfRunId,
scheduledRetryAt: heartbeatRuns.scheduledRetryAt,
scheduledRetryAttempt: heartbeatRuns.scheduledRetryAttempt,
scheduledRetryReason: heartbeatRuns.scheduledRetryReason,
error: heartbeatRuns.error,
errorCode: heartbeatRuns.errorCode,
})
.from(heartbeatRuns)
.innerJoin(agents, eq(heartbeatRuns.agentId, agents.id))
.where(
and(
eq(heartbeatRuns.companyId, companyId),
eq(heartbeatRuns.status, "scheduled_retry"),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
),
)
.orderBy(asc(heartbeatRuns.scheduledRetryAt), asc(heartbeatRuns.createdAt), asc(heartbeatRuns.id))
.limit(1)
.then((rows) => rows[0] ?? null);
return row ? { ...row, status: "scheduled_retry" } : null;
}
function deriveIssueCommentAuthorType(comment: {
authorType?: string | null;
authorAgentId?: string | null;
@ -2502,6 +2545,16 @@ export function issueService(db: Db) {
return getIssueByIdentifier(identifier);
},
getCurrentScheduledRetry: async (issueId: string) => {
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })
.from(issues)
.where(eq(issues.id, issueId))
.then((rows) => rows[0] ?? null);
if (!issue) throw notFound("Issue not found");
return getCurrentScheduledRetryForIssue(issue.id, issue.companyId);
},
getRelationSummaries: async (issueId: string) => {
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })