Add issue controls and retry-now recovery (#5426)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - Issue operators need clear controls for execution settings, model
overrides, and recovery retries
> - Existing issue properties hid useful adapter override state and did
not expose a board-triggered retry for scheduled heartbeat recovery
> - Scheduled retries also need to respect the same safety gates as
normal execution instead of bypassing budget, review, pause, dependency,
or terminal-state checks
> - This pull request adds the issue property controls and retry-now
surfaces together because they share the issue details/properties UI
> - The benefit is that operators can inspect and adjust issue execution
settings and safely trigger pending scheduled recovery without hidden
control-plane behavior

## What Changed

- Adds editable issue assignee model override controls in
`IssueProperties`, with focused coverage.
- Removes the stale workspace tasks link from issue properties.
- Adds a scheduled retry `retry-now` backend path and shared response
types.
- Adds main-pane and properties-pane scheduled retry UI, backed by a
shared `useRetryNowMutation` hook.
- Adds suppression coverage for budget hard stops, review participant
changes, subtree pause holds, unresolved blockers, terminal issues, and
company scoping.
- Updates the `IssueProperties` test harness with toast actions required
by the retry-now hook.

## Verification

- `pnpm exec vitest run ui/src/components/IssueProperties.test.tsx
ui/src/components/IssueScheduledRetryCard.test.tsx` — 31 passed.
- `pnpm exec vitest run
server/src/__tests__/issue-scheduled-retry-routes.test.ts` — exited 0,
but this host skipped the embedded Postgres route tests with: `Postgres
init script exited with code null. Please check the logs for extra info.
The data directory might already exist.`
- Pairwise merge check against the assigned-backlog PR branch completed
without conflicts via `git merge --no-commit --no-ff` in a temporary
worktree.

### Visual verification screenshots

Storybook story: `Product/Issue Scheduled retry surfaces /
ScheduledRetrySurfaces`.

![Scheduled retry card and issue properties rows -
desktop](https://raw.githubusercontent.com/paperclipai/paperclip/62fb566f357312b43b9162af02252d0175530a8f/docs/assets/pr-5426/scheduled-retry-story-desktop.png)

![Scheduled retry card and issue properties rows -
mobile](https://raw.githubusercontent.com/paperclipai/paperclip/62fb566f357312b43b9162af02252d0175530a8f/docs/assets/pr-5426/scheduled-retry-story-mobile.png)

## Risks

- Medium: this touches issue execution/retry behavior, so CI should run
the embedded Postgres route tests on a host that can initialize
Postgres.
- Low-to-medium UI risk around duplicated retry-now entry points; both
surfaces share one mutation hook to keep behavior consistent.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex coding agent, GPT-5 model family (`gpt-5`), tool-enabled
Paperclip heartbeat environment. Context window and internal reasoning
mode are not exposed by the runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-05-07 12:23:13 -05:00 committed by GitHub
parent d0e9cc76f2
commit 772fc92619
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 2269 additions and 117 deletions

View file

@ -0,0 +1,518 @@
import { randomUUID } from "node:crypto";
import express from "express";
import request from "supertest";
import { and, eq } from "drizzle-orm";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
activityLog,
agents,
agentWakeupRequests,
companies,
createDb,
heartbeatRunEvents,
heartbeatRuns,
issueComments,
issueRelations,
issueTreeHolds,
issues,
} from "@paperclipai/db";
import {
getEmbeddedPostgresTestSupport,
startEmbeddedPostgresTestDatabase,
} from "./helpers/embedded-postgres.js";
import { errorHandler } from "../middleware/index.js";
import { issueRoutes } from "../routes/issues.js";
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
if (!embeddedPostgresSupport.supported) {
console.warn(
`Skipping embedded Postgres scheduled retry route tests on this host: ${
embeddedPostgresSupport.reason ?? "unsupported environment"
}`,
);
}
describeEmbeddedPostgres("issue scheduled retry routes", () => {
let db!: ReturnType<typeof createDb>;
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
beforeAll(async () => {
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-issue-scheduled-retry-routes-");
db = createDb(tempDb.connectionString);
}, 20_000);
afterEach(async () => {
await db.delete(issueComments);
await db.delete(issueRelations);
await db.delete(issueTreeHolds);
await db.delete(activityLog);
await db.delete(issues);
await db.delete(heartbeatRunEvents);
await db.delete(heartbeatRuns);
await db.delete(agentWakeupRequests);
await db.delete(agents);
await db.delete(companies);
});
afterAll(async () => {
await tempDb?.cleanup();
});
function createApp(actor: Express.Request["actor"]) {
const app = express();
app.use(express.json());
app.use((req, _res, next) => {
req.actor = actor;
next();
});
app.use("/api", issueRoutes(db, {} as any));
app.use(errorHandler);
return app;
}
function boardActor(companyId: string): Express.Request["actor"] {
return {
type: "board",
userId: "board-user",
companyIds: [companyId],
memberships: [{ companyId, membershipRole: "admin", status: "active" }],
isInstanceAdmin: false,
source: "session",
};
}
function agentActor(companyId: string, agentId: string): Express.Request["actor"] {
return {
type: "agent",
agentId,
companyId,
runId: randomUUID(),
source: "agent_jwt",
};
}
async function seedIssueWithRetry(input: {
agentStatus?: "active" | "paused";
retryStatus?: "scheduled_retry" | "queued" | "running";
issueStatus?: "in_progress" | "todo" | "done" | "cancelled";
} = {}) {
const companyId = randomUUID();
const agentId = randomUUID();
const issueId = randomUUID();
const sourceRunId = randomUUID();
const retryRunId = randomUUID();
const wakeupRequestId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const now = new Date("2026-05-06T18:00:00.000Z");
const scheduledRetryAt = new Date("2026-05-06T19:00:00.000Z");
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: input.agentStatus ?? "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: sourceRunId,
companyId,
agentId,
invocationSource: "assignment",
triggerDetail: "system",
status: "failed",
error: "transient upstream error",
errorCode: "adapter_failed",
finishedAt: now,
contextSnapshot: {
issueId,
wakeReason: "issue_assigned",
},
updatedAt: now,
createdAt: now,
});
await db.insert(agentWakeupRequests).values({
id: wakeupRequestId,
companyId,
agentId,
source: "automation",
triggerDetail: "system",
reason: "bounded_transient_heartbeat_retry",
payload: {
issueId,
retryOfRunId: sourceRunId,
scheduledRetryAt: scheduledRetryAt.toISOString(),
},
status: "queued",
});
await db.insert(heartbeatRuns).values({
id: retryRunId,
companyId,
agentId,
invocationSource: "automation",
triggerDetail: "system",
status: input.retryStatus ?? "scheduled_retry",
wakeupRequestId,
retryOfRunId: sourceRunId,
scheduledRetryAt,
scheduledRetryAttempt: 2,
scheduledRetryReason: "transient_failure",
contextSnapshot: {
issueId,
wakeReason: "bounded_transient_heartbeat_retry",
retryOfRunId: sourceRunId,
scheduledRetryAt: scheduledRetryAt.toISOString(),
scheduledRetryAttempt: 2,
retryReason: "transient_failure",
},
updatedAt: now,
createdAt: now,
});
await db
.update(agentWakeupRequests)
.set({ runId: retryRunId })
.where(eq(agentWakeupRequests.id, wakeupRequestId));
await db.insert(issues).values({
id: issueId,
companyId,
title: "Retryable issue",
status: input.issueStatus ?? "in_progress",
priority: "medium",
assigneeAgentId: agentId,
executionRunId: retryRunId,
executionAgentNameKey: "codexcoder",
executionLockedAt: now,
issueNumber: 1,
identifier: `${issuePrefix}-1`,
});
return { companyId, agentId, issueId, sourceRunId, retryRunId, scheduledRetryAt };
}
it("surfaces the current scheduled retry in the issue read model", async () => {
const { companyId, issueId, agentId, sourceRunId, retryRunId, scheduledRetryAt } = await seedIssueWithRetry();
const res = await request(createApp(boardActor(companyId))).get(`/api/issues/${issueId}`);
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body.scheduledRetry).toMatchObject({
runId: retryRunId,
status: "scheduled_retry",
agentId,
agentName: "CodexCoder",
retryOfRunId: sourceRunId,
scheduledRetryAttempt: 2,
scheduledRetryReason: "transient_failure",
});
expect(res.body.scheduledRetry.scheduledRetryAt).toBe(scheduledRetryAt.toISOString());
});
it("promotes the existing scheduled retry and treats duplicate clicks as idempotent", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry();
const app = createApp(boardActor(companyId));
const first = await request(app).post(`/api/issues/${issueId}/scheduled-retry/retry-now`).send({});
expect(first.status, JSON.stringify(first.body)).toBe(200);
expect(first.body).toMatchObject({
outcome: "promoted",
scheduledRetry: {
runId: retryRunId,
status: "queued",
},
});
const second = await request(app).post(`/api/issues/${issueId}/scheduled-retry/retry-now`).send({});
expect(second.status, JSON.stringify(second.body)).toBe(200);
expect(second.body).toMatchObject({
outcome: "already_promoted",
scheduledRetry: {
runId: retryRunId,
status: "queued",
},
});
const retryRuns = await db
.select({ id: heartbeatRuns.id, status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(and(eq(heartbeatRuns.retryOfRunId, first.body.scheduledRetry.retryOfRunId), eq(heartbeatRuns.companyId, companyId)));
expect(retryRuns).toHaveLength(1);
expect(retryRuns[0]).toMatchObject({ id: retryRunId, status: "queued" });
});
it("returns a clear no-op response when there is no scheduled retry", async () => {
const companyId = randomUUID();
const issueId = randomUUID();
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: "NONE",
requireBoardApprovalForNewAgents: false,
});
await db.insert(issues).values({
id: issueId,
companyId,
title: "No retry",
status: "todo",
priority: "medium",
issueNumber: 1,
identifier: "NONE-1",
});
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "no_scheduled_retry",
scheduledRetry: null,
});
});
it("reports already-promoted retries without creating another run", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry({ retryStatus: "queued" });
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "already_promoted",
scheduledRetry: {
runId: retryRunId,
status: "queued",
},
});
});
it("uses normal promotion gates and records gate-suppressed retries", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry({ agentStatus: "paused" });
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "agent_not_invokable",
},
});
const [run] = await db
.select({ status: heartbeatRuns.status, errorCode: heartbeatRuns.errorCode })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, retryRunId));
expect(run).toEqual({ status: "cancelled", errorCode: "agent_not_invokable" });
const [activity] = await db
.select({ action: activityLog.action, entityId: activityLog.entityId, runId: activityLog.runId })
.from(activityLog)
.where(eq(activityLog.entityId, issueId));
expect(activity).toEqual({
action: "issue.scheduled_retry_retry_now",
entityId: issueId,
runId: retryRunId,
});
});
it("requires board access for retry-now", async () => {
const { companyId, agentId, issueId } = await seedIssueWithRetry();
const res = await request(createApp(agentActor(companyId, agentId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status).toBe(403);
});
it("enforces company scoping for retry-now", async () => {
const { issueId } = await seedIssueWithRetry();
const res = await request(createApp(boardActor(randomUUID())))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status).toBe(403);
});
it("suppresses retry-now when the issue is under a budget hard-stop", async () => {
const { companyId, agentId, issueId, retryRunId } = await seedIssueWithRetry();
await db
.update(agents)
.set({ status: "paused", pauseReason: "budget" })
.where(eq(agents.id, agentId));
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "budget_blocked",
},
});
});
it("suppresses retry-now when the issue is waiting on another review participant", async () => {
const { companyId, agentId, issueId, retryRunId } = await seedIssueWithRetry({ issueStatus: "in_progress" });
const reviewerAgentId = randomUUID();
await db.insert(agents).values({
id: reviewerAgentId,
companyId,
name: "ReviewerAgent",
role: "qa",
status: "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db
.update(issues)
.set({
status: "in_review",
executionState: {
status: "pending",
currentStageId: randomUUID(),
currentStageIndex: 0,
currentStageType: "review",
currentParticipant: { type: "agent", agentId: reviewerAgentId, userId: null },
returnAssignee: { type: "agent", agentId, userId: null },
reviewRequest: null,
completedStageIds: [],
lastDecisionId: null,
lastDecisionOutcome: null,
},
})
.where(eq(issues.id, issueId));
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "issue_review_participant_changed",
},
});
});
it("suppresses retry-now when the issue is under an active subtree pause hold", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry();
await db.insert(issueTreeHolds).values({
companyId,
rootIssueId: issueId,
mode: "pause",
status: "active",
reason: "manual pause for review",
releasePolicy: { strategy: "manual" },
});
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "issue_paused",
},
});
});
it("suppresses retry-now when unresolved blockers remain", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry();
const blockerId = randomUUID();
await db.insert(issues).values({
id: blockerId,
companyId,
title: "Blocking task",
status: "todo",
priority: "medium",
issueNumber: 2,
identifier: "BLOCK-2",
});
await db.insert(issueRelations).values({
id: randomUUID(),
companyId,
issueId: blockerId,
relatedIssueId: issueId,
type: "blocks",
});
await db
.update(issues)
.set({ status: "blocked" })
.where(eq(issues.id, issueId));
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "issue_dependencies_blocked",
},
});
});
it("suppresses retry-now when the issue already reached a terminal status", async () => {
const { companyId, issueId, retryRunId } = await seedIssueWithRetry({ issueStatus: "done" });
const res = await request(createApp(boardActor(companyId)))
.post(`/api/issues/${issueId}/scheduled-retry/retry-now`)
.send({});
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(res.body).toMatchObject({
outcome: "gate_suppressed",
scheduledRetry: {
runId: retryRunId,
status: "cancelled",
errorCode: "issue_terminal_status",
},
});
});
});

View file

@ -13,6 +13,7 @@ const mockIssueService = vi.hoisted(() => ({
getComment: vi.fn(),
listBlockerAttention: vi.fn(),
listProductivityReviews: vi.fn(),
getCurrentScheduledRetry: vi.fn(),
listAttachments: vi.fn(),
}));
@ -188,6 +189,7 @@ describe.sequential("issue goal context routes", () => {
mockIssueService.getComment.mockResolvedValue(null);
mockIssueService.listBlockerAttention.mockResolvedValue(new Map());
mockIssueService.listProductivityReviews.mockResolvedValue(new Map());
mockIssueService.getCurrentScheduledRetry.mockResolvedValue(null);
mockIssueService.listAttachments.mockResolvedValue([]);
mockDocumentsService.getIssueDocumentPayload.mockResolvedValue({});
mockDocumentsService.getIssueDocumentByKey.mockResolvedValue(null);

View file

@ -1498,6 +1498,7 @@ export function issueRoutes(
relations,
blockerAttention,
productivityReview,
scheduledRetry,
attachments,
continuationSummary,
currentExecutionWorkspace,
@ -1510,6 +1511,7 @@ export function issueRoutes(
svc.getRelationSummaries(issue.id),
svc.listBlockerAttention(issue.companyId, [issue]).then((map) => map.get(issue.id) ?? null),
svc.listProductivityReviews(issue.companyId, [issue.id]).then((map) => map.get(issue.id) ?? null),
svc.getCurrentScheduledRetry(issue.id),
svc.listAttachments(issue.id),
documentsSvc.getIssueDocumentByKey(issue.id, ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY),
currentExecutionWorkspacePromise,
@ -1525,6 +1527,7 @@ export function issueRoutes(
workMode: issue.workMode,
...(blockerAttention ? { blockerAttention } : {}),
productivityReview,
scheduledRetry,
priority: issue.priority,
projectId: issue.projectId,
goalId: goal?.id ?? issue.goalId,
@ -1606,6 +1609,7 @@ export function issueRoutes(
productivityReview,
referenceSummary,
successfulRunHandoffStates,
scheduledRetry,
] = await Promise.all([
resolveIssueProjectAndGoal(issue),
svc.getAncestors(issue.id),
@ -1616,6 +1620,7 @@ export function issueRoutes(
svc.listProductivityReviews(issue.companyId, [issue.id]).then((map) => map.get(issue.id) ?? null),
issueReferencesSvc.listIssueReferenceSummary(issue.id),
listSuccessfulRunHandoffStates(db, issue.companyId, [issue.id]),
svc.getCurrentScheduledRetry(issue.id),
]);
const mentionedProjects = mentionedProjectIds.length > 0
? await projectsSvc.listByIds(issue.companyId, mentionedProjectIds)
@ -1631,6 +1636,7 @@ export function issueRoutes(
...(blockerAttention ? { blockerAttention } : {}),
productivityReview,
successfulRunHandoff: successfulRunHandoffStates.get(issue.id) ?? null,
scheduledRetry,
blockedBy: relations.blockedBy,
blocks: relations.blocks,
relatedWork: referenceSummary,
@ -2438,6 +2444,44 @@ export function issueRoutes(
res.json({ ok: true });
});
router.post("/issues/:id/scheduled-retry/retry-now", async (req, res) => {
assertBoard(req);
const id = req.params.id as string;
const issue = await svc.getById(id);
if (!issue) {
res.status(404).json({ error: "Issue not found" });
return;
}
assertCompanyAccess(req, issue.companyId);
const actor = getActorInfo(req);
const result = await heartbeat.retryScheduledRetryNow({
issueId: issue.id,
actor: {
actorType: actor.actorType,
actorId: actor.actorId,
},
});
await logActivity(db, {
companyId: issue.companyId,
actorType: actor.actorType,
actorId: actor.actorId,
action: "issue.scheduled_retry_retry_now",
entityType: "issue",
entityId: issue.id,
agentId: result.scheduledRetry?.agentId ?? issue.assigneeAgentId ?? null,
runId: result.scheduledRetry?.runId ?? null,
details: {
outcome: result.outcome,
message: result.message,
scheduledRetry: result.scheduledRetry,
},
});
res.json(result);
});
router.patch("/issues/:id", validate(updateIssueRouteSchema), async (req, res) => {
const id = req.params.id as string;
const existing = await svc.getById(id);

View file

@ -4712,6 +4712,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
issueId: string | null;
details: Record<string, unknown>;
};
type BlockedScheduledRetryGate = Extract<ScheduledRetryGate, { allowed: false }>;
async function evaluateScheduledRetryGate(input: {
run: typeof heartbeatRuns.$inferSelect;
@ -4960,6 +4961,111 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
return cancelled;
}
async function promoteScheduledRetryRun(
dueRun: typeof heartbeatRuns.$inferSelect,
now: Date,
): Promise<
| { outcome: "promoted"; run: typeof heartbeatRuns.$inferSelect }
| {
outcome: "gate_suppressed";
run: typeof heartbeatRuns.$inferSelect;
reason: string;
errorCode: BlockedScheduledRetryGate["errorCode"];
}
| { outcome: "not_promoted"; run: typeof heartbeatRuns.$inferSelect | null }
> {
const agent = await getAgent(dueRun.agentId);
if (!agent) {
const gate = {
allowed: false as const,
reason: "Scheduled retry suppressed because the agent no longer exists",
errorCode: "agent_not_invokable" as const,
issueId: readNonEmptyString(parseObject(dueRun.contextSnapshot).issueId),
details: { agentId: dueRun.agentId },
};
const cancelled = await cancelScheduledRetryForGate(dueRun, gate, now);
return cancelled
? {
outcome: "gate_suppressed",
run: cancelled,
reason: gate.reason,
errorCode: gate.errorCode,
}
: { outcome: "not_promoted", run: null };
}
const contextSnapshot = parseObject(dueRun.contextSnapshot);
const gate = await evaluateScheduledRetryGate({
run: dueRun,
agent,
contextSnapshot,
retryReason: dueRun.scheduledRetryReason,
enforceIssueExecutionLock: dueRun.scheduledRetryReason === MAX_TURN_CONTINUATION_RETRY_REASON,
});
if (!gate.allowed) {
if (
gate.errorCode === "issue_not_found" &&
dueRun.scheduledRetryReason !== MAX_TURN_CONTINUATION_RETRY_REASON
) {
// Preserve legacy transient retry behavior for runs that only carry a
// loose task context rather than a persisted issue row.
} else {
const cancelled = await cancelScheduledRetryForGate(dueRun, gate, now);
return cancelled
? {
outcome: "gate_suppressed",
run: cancelled,
reason: gate.reason,
errorCode: gate.errorCode,
}
: { outcome: "not_promoted", run: null };
}
}
const promoted = await db
.update(heartbeatRuns)
.set({
status: "queued",
updatedAt: now,
})
.where(
and(
eq(heartbeatRuns.id, dueRun.id),
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.returning()
.then((rows) => rows[0] ?? null);
if (!promoted) return { outcome: "not_promoted", run: null };
await appendRunEvent(promoted, await nextRunEventSeq(promoted.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry became due and was promoted to the queued run pool",
payload: {
scheduledRetryAttempt: promoted.scheduledRetryAttempt,
scheduledRetryAt: promoted.scheduledRetryAt ? new Date(promoted.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: promoted.scheduledRetryReason,
},
});
publishLiveEvent({
companyId: promoted.companyId,
type: "heartbeat.run.queued",
payload: {
runId: promoted.id,
agentId: promoted.agentId,
invocationSource: promoted.invocationSource,
triggerDetail: promoted.triggerDetail,
wakeupRequestId: promoted.wakeupRequestId,
},
});
return { outcome: "promoted", run: promoted };
}
async function scheduleBoundedRetryForRun(
run: typeof heartbeatRuns.$inferSelect,
agent: typeof agents.$inferSelect,
@ -5384,81 +5490,10 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
const promotedRunIds: string[] = [];
for (const dueRun of dueRuns) {
const agent = await getAgent(dueRun.agentId);
if (!agent) {
await cancelScheduledRetryForGate(dueRun, {
allowed: false,
reason: "Scheduled retry suppressed because the agent no longer exists",
errorCode: "agent_not_invokable",
issueId: readNonEmptyString(parseObject(dueRun.contextSnapshot).issueId),
details: { agentId: dueRun.agentId },
}, now);
continue;
const result = await promoteScheduledRetryRun(dueRun, now);
if (result.outcome === "promoted") {
promotedRunIds.push(result.run.id);
}
const contextSnapshot = parseObject(dueRun.contextSnapshot);
const gate = await evaluateScheduledRetryGate({
run: dueRun,
agent,
contextSnapshot,
retryReason: dueRun.scheduledRetryReason,
enforceIssueExecutionLock: dueRun.scheduledRetryReason === MAX_TURN_CONTINUATION_RETRY_REASON,
});
if (!gate.allowed) {
if (
gate.errorCode === "issue_not_found" &&
dueRun.scheduledRetryReason !== MAX_TURN_CONTINUATION_RETRY_REASON
) {
// Preserve legacy transient retry behavior for runs that only carry a
// loose task context rather than a persisted issue row.
} else {
await cancelScheduledRetryForGate(dueRun, gate, now);
continue;
}
}
const promoted = await db
.update(heartbeatRuns)
.set({
status: "queued",
updatedAt: now,
})
.where(
and(
eq(heartbeatRuns.id, dueRun.id),
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.returning()
.then((rows) => rows[0] ?? null);
if (!promoted) continue;
promotedRunIds.push(promoted.id);
await appendRunEvent(promoted, await nextRunEventSeq(promoted.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry became due and was promoted to the queued run pool",
payload: {
scheduledRetryAttempt: promoted.scheduledRetryAttempt,
scheduledRetryAt: promoted.scheduledRetryAt ? new Date(promoted.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: promoted.scheduledRetryReason,
},
});
publishLiveEvent({
companyId: promoted.companyId,
type: "heartbeat.run.queued",
payload: {
runId: promoted.id,
agentId: promoted.agentId,
invocationSource: promoted.invocationSource,
triggerDetail: promoted.triggerDetail,
wakeupRequestId: promoted.wakeupRequestId,
},
});
}
return {
@ -5467,6 +5502,182 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
};
}
async function getIssueRetryRun(
companyId: string,
issueId: string,
statuses: Array<"scheduled_retry" | "queued" | "running" | "cancelled">,
) {
if (statuses.length === 0) return null;
return db
.select({
run: heartbeatRuns,
agentName: agents.name,
})
.from(heartbeatRuns)
.innerJoin(agents, eq(heartbeatRuns.agentId, agents.id))
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, statuses),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
sql`${heartbeatRuns.retryOfRunId} is not null`,
),
)
.orderBy(desc(heartbeatRuns.updatedAt), desc(heartbeatRuns.createdAt), desc(heartbeatRuns.id))
.limit(1)
.then((rows) => rows[0] ?? null);
}
function summarizeIssueScheduledRetryRun(
row: { run: typeof heartbeatRuns.$inferSelect; agentName: string | null },
) {
return {
runId: row.run.id,
status: row.run.status as "scheduled_retry" | "queued" | "running" | "cancelled",
agentId: row.run.agentId,
agentName: row.agentName,
retryOfRunId: row.run.retryOfRunId,
scheduledRetryAt: row.run.scheduledRetryAt,
scheduledRetryAttempt: row.run.scheduledRetryAttempt,
scheduledRetryReason: row.run.scheduledRetryReason,
error: row.run.error,
errorCode: row.run.errorCode,
};
}
async function retryScheduledRetryNow(input: {
issueId: string;
actor?: { actorType?: "user" | "agent" | "system"; actorId?: string | null };
now?: Date;
}) {
const now = input.now ?? new Date();
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })
.from(issues)
.where(eq(issues.id, input.issueId))
.then((rows) => rows[0] ?? null);
if (!issue) throw notFound("Issue not found");
const scheduled = await getIssueRetryRun(issue.companyId, issue.id, ["scheduled_retry"]);
if (!scheduled) {
const alreadyPromoted = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running"]);
if (alreadyPromoted) {
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry: summarizeIssueScheduledRetryRun(alreadyPromoted),
};
}
return {
outcome: "no_scheduled_retry" as const,
message: "No live scheduled retry exists for this issue",
scheduledRetry: null,
};
}
const contextSnapshot = {
...parseObject(scheduled.run.contextSnapshot),
scheduledRetryAt: now.toISOString(),
retryNowRequestedAt: now.toISOString(),
retryNowRequestedByActorType: input.actor?.actorType ?? null,
retryNowRequestedByActorId: input.actor?.actorId ?? null,
};
const updated = await db.transaction(async (tx) => {
const row = await tx
.update(heartbeatRuns)
.set({
scheduledRetryAt: now,
contextSnapshot,
updatedAt: now,
})
.where(and(eq(heartbeatRuns.id, scheduled.run.id), eq(heartbeatRuns.status, "scheduled_retry")))
.returning()
.then((rows) => rows[0] ?? null);
if (!row) return null;
if (row.wakeupRequestId) {
const wakeupPayload = {
...(parseObject(
await tx
.select({ payload: agentWakeupRequests.payload })
.from(agentWakeupRequests)
.where(eq(agentWakeupRequests.id, row.wakeupRequestId))
.then((rows) => rows[0]?.payload ?? null),
)),
scheduledRetryAt: now.toISOString(),
retryNowRequestedAt: now.toISOString(),
};
await tx
.update(agentWakeupRequests)
.set({
payload: wakeupPayload,
updatedAt: now,
})
.where(eq(agentWakeupRequests.id, row.wakeupRequestId));
}
return row;
});
if (!updated) {
const alreadyPromoted = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running"]);
if (alreadyPromoted) {
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry: summarizeIssueScheduledRetryRun(alreadyPromoted),
};
}
return {
outcome: "no_scheduled_retry" as const,
message: "No live scheduled retry exists for this issue",
scheduledRetry: null,
};
}
await appendRunEvent(updated, await nextRunEventSeq(updated.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry was requested to run now",
payload: {
issueId: issue.id,
scheduledRetryAttempt: updated.scheduledRetryAttempt,
scheduledRetryAt: updated.scheduledRetryAt ? new Date(updated.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: updated.scheduledRetryReason,
requestedByActorType: input.actor?.actorType ?? null,
requestedByActorId: input.actor?.actorId ?? null,
},
});
const promotion = await promoteScheduledRetryRun(updated, now);
const promotedRow = await getIssueRetryRun(issue.companyId, issue.id, ["queued", "running", "cancelled"]);
const scheduledRetry = promotedRow
? summarizeIssueScheduledRetryRun(promotedRow)
: summarizeIssueScheduledRetryRun({ run: promotion.run ?? updated, agentName: scheduled.agentName });
if (promotion.outcome === "promoted") {
return {
outcome: "promoted" as const,
message: "Scheduled retry was promoted to the queued run pool",
scheduledRetry,
};
}
if (promotion.outcome === "gate_suppressed") {
return {
outcome: "gate_suppressed" as const,
message: promotion.reason,
scheduledRetry,
};
}
return {
outcome: "already_promoted" as const,
message: "Scheduled retry was already promoted",
scheduledRetry,
};
}
function parseHeartbeatPolicy(agent: typeof agents.$inferSelect) {
const runtimeConfig = parseObject(agent.runtimeConfig);
const heartbeat = parseObject(runtimeConfig.heartbeat);
@ -9383,6 +9594,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
reapOrphanedRuns,
promoteDueScheduledRetries,
retryScheduledRetryNow,
resumeQueuedRuns,

View file

@ -156,6 +156,19 @@ type IssueActiveRunRow = {
finishedAt: Date | null;
createdAt: Date;
};
type IssueScheduledRetryRow = {
runId: string;
status: "scheduled_retry" | "queued" | "running" | "cancelled";
agentId: string;
agentName: string | null;
retryOfRunId: string | null;
scheduledRetryAt: Date | null;
scheduledRetryAttempt: number;
scheduledRetryReason: string | null;
retryExhaustedReason?: string | null;
error?: string | null;
errorCode?: string | null;
};
type IssueWithLabels = IssueRow & { labels: IssueLabelRow[]; labelIds: string[] };
type IssueWithLabelsAndRun = IssueWithLabels & { activeRun: IssueActiveRunRow | null };
type IssueUserCommentStats = {
@ -1686,6 +1699,36 @@ export function issueService(db: Db) {
return enriched;
}
async function getCurrentScheduledRetryForIssue(issueId: string, companyId: string): Promise<IssueScheduledRetryRow | null> {
const row = await db
.select({
runId: heartbeatRuns.id,
status: heartbeatRuns.status,
agentId: heartbeatRuns.agentId,
agentName: agents.name,
retryOfRunId: heartbeatRuns.retryOfRunId,
scheduledRetryAt: heartbeatRuns.scheduledRetryAt,
scheduledRetryAttempt: heartbeatRuns.scheduledRetryAttempt,
scheduledRetryReason: heartbeatRuns.scheduledRetryReason,
error: heartbeatRuns.error,
errorCode: heartbeatRuns.errorCode,
})
.from(heartbeatRuns)
.innerJoin(agents, eq(heartbeatRuns.agentId, agents.id))
.where(
and(
eq(heartbeatRuns.companyId, companyId),
eq(heartbeatRuns.status, "scheduled_retry"),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
),
)
.orderBy(asc(heartbeatRuns.scheduledRetryAt), asc(heartbeatRuns.createdAt), asc(heartbeatRuns.id))
.limit(1)
.then((rows) => rows[0] ?? null);
return row ? { ...row, status: "scheduled_retry" } : null;
}
function deriveIssueCommentAuthorType(comment: {
authorType?: string | null;
authorAgentId?: string | null;
@ -2502,6 +2545,16 @@ export function issueService(db: Db) {
return getIssueByIdentifier(identifier);
},
getCurrentScheduledRetry: async (issueId: string) => {
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })
.from(issues)
.where(eq(issues.id, issueId))
.then((rows) => rows[0] ?? null);
if (!issue) throw notFound("Issue not found");
return getCurrentScheduledRetryForIssue(issue.id, issue.companyId);
},
getRelationSummaries: async (issueId: string) => {
const issue = await db
.select({ id: issues.id, companyId: issues.companyId })