[codex] Harden heartbeat scheduling and runtime controls (#4223)

## Thinking Path

> - Paperclip orchestrates AI agents through issue checkout, heartbeat
runs, routines, and auditable control-plane state
> - The runtime path has to recover from lost local processes, transient
adapter failures, blocked dependencies, and routine coalescing without
stranding work
> - The existing branch carried several reliability fixes across
heartbeat scheduling, issue runtime controls, routine dispatch, and
operator-facing run state
> - These changes belong together because they share backend contracts,
migrations, and runtime status semantics
> - This pull request groups the control-plane/runtime slice so it can
merge independently from board UI polish and adapter sandbox work
> - The benefit is safer heartbeat recovery, clearer runtime controls,
and more predictable recurring execution behavior

## What Changed

- Adds bounded heartbeat retry scheduling, scheduled retry state, and
Codex transient failure recovery handling.
- Tightens heartbeat process recovery, blocker wake behavior, issue
comment wake handling, routine dispatch coalescing, and
activity/dashboard bounds.
- Adds runtime-control MCP tools and Paperclip skill docs for issue
workspace runtime management.
- Adds migrations `0061_lively_thor_girl.sql` and
`0062_routine_run_dispatch_fingerprint.sql`.
- Surfaces retry state in run ledger/agent UI and keeps related shared
types synchronized.

## Verification

- `pnpm exec vitest run
server/src/__tests__/heartbeat-retry-scheduling.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts
server/src/__tests__/routines-service.test.ts`
- `pnpm exec vitest run src/tools.test.ts` from `packages/mcp-server`

## Risks

- Medium risk: this touches heartbeat recovery and routine dispatch,
which are central execution paths.
- Migration order matters if split branches land out of order: merge
this PR before branches that assume the new runtime/routine fields.
- Runtime retry behavior should be watched in CI and in local operator
smoke tests because it changes how transient failures are resumed.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5-based coding agent runtime, shell/git tool use
enabled. Exact hosted model build and context window are not exposed in
this Paperclip heartbeat environment.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Dotta 2026-04-21 12:24:11 -05:00 committed by GitHub
parent ab9051b595
commit 09d0678840
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 17622 additions and 456 deletions

View file

@ -21,6 +21,10 @@ const mockIssueService = vi.hoisted(() => ({
vi.mock("../services/activity.js", () => ({
activityService: () => mockActivityService,
normalizeActivityLimit: (limit: number | undefined) => {
if (!Number.isFinite(limit)) return 100;
return Math.max(1, Math.min(500, Math.floor(limit ?? 100)));
},
}));
vi.mock("../services/index.js", () => ({
@ -58,6 +62,38 @@ describe("activity routes", () => {
vi.clearAllMocks();
});
it("limits company activity lists by default", async () => {
mockActivityService.list.mockResolvedValue([]);
const app = await createApp();
const res = await request(app).get("/api/companies/company-1/activity");
expect(res.status).toBe(200);
expect(mockActivityService.list).toHaveBeenCalledWith({
companyId: "company-1",
agentId: undefined,
entityType: undefined,
entityId: undefined,
limit: 100,
});
});
it("caps requested company activity list limits", async () => {
mockActivityService.list.mockResolvedValue([]);
const app = await createApp();
const res = await request(app).get("/api/companies/company-1/activity?limit=5000&entityType=issue");
expect(res.status).toBe(200);
expect(mockActivityService.list).toHaveBeenCalledWith({
companyId: "company-1",
agentId: undefined,
entityType: "issue",
entityId: undefined,
limit: 500,
});
});
it("resolves issue identifiers before loading runs", async () => {
mockIssueService.getByIdentifier.mockResolvedValue({
id: "issue-uuid-1",

View file

@ -1,6 +1,7 @@
import { randomUUID } from "node:crypto";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
activityLog,
agents,
companies,
createDb,
@ -56,6 +57,7 @@ describeEmbeddedPostgres("activity service", () => {
}, 20_000);
afterEach(async () => {
await db.delete(activityLog);
await db.delete(issueComments);
await db.delete(issueDocuments);
await db.delete(documentRevisions);
@ -70,6 +72,51 @@ describeEmbeddedPostgres("activity service", () => {
await tempDb?.cleanup();
});
it("limits company activity lists", async () => {
const companyId = randomUUID();
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(activityLog).values([
{
companyId,
actorType: "system",
actorId: "system",
action: "test.oldest",
entityType: "company",
entityId: companyId,
createdAt: new Date("2026-04-21T10:00:00.000Z"),
},
{
companyId,
actorType: "system",
actorId: "system",
action: "test.middle",
entityType: "company",
entityId: companyId,
createdAt: new Date("2026-04-21T11:00:00.000Z"),
},
{
companyId,
actorType: "system",
actorId: "system",
action: "test.newest",
entityType: "company",
entityId: companyId,
createdAt: new Date("2026-04-21T12:00:00.000Z"),
},
]);
const result = await activityService(db).list({ companyId, limit: 2 });
expect(result.map((event) => event.action)).toEqual(["test.newest", "test.middle"]);
});
it("returns compact usage and result summaries for issue runs", async () => {
const companyId = randomUUID();
const agentId = randomUUID();

View file

@ -29,6 +29,15 @@ console.log(JSON.stringify({ type: "turn.completed", usage: { input_tokens: 1, c
await fs.chmod(commandPath, 0o755);
}
async function writeFailingCodexCommand(commandPath: string, errorMessage: string): Promise<void> {
const script = `#!/usr/bin/env node
console.log(JSON.stringify({ type: "error", message: ${JSON.stringify(errorMessage)} }));
process.exit(1);
`;
await fs.writeFile(commandPath, script, "utf8");
await fs.chmod(commandPath, 0o755);
}
type CapturePayload = {
argv: string[];
prompt: string;
@ -369,6 +378,131 @@ describe("codex execute", () => {
}
});
it("classifies remote-compaction high-demand failures as retryable transient upstream errors", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-transient-"));
const workspace = path.join(root, "workspace");
const commandPath = path.join(root, "codex");
await fs.mkdir(workspace, { recursive: true });
await writeFailingCodexCommand(
commandPath,
"Error running remote compact task: We're currently experiencing high demand, which may cause temporary errors.",
);
const previousHome = process.env.HOME;
process.env.HOME = root;
try {
const result = await execute({
runId: "run-transient-error",
agent: {
id: "agent-1",
companyId: "company-1",
name: "Codex Coder",
adapterType: "codex_local",
adapterConfig: {},
},
runtime: {
sessionId: null,
sessionParams: null,
sessionDisplayId: null,
taskKey: null,
},
config: {
command: commandPath,
cwd: workspace,
promptTemplate: "Follow the paperclip heartbeat.",
},
context: {},
authToken: "run-jwt-token",
onLog: async () => {},
});
expect(result.exitCode).toBe(1);
expect(result.errorCode).toBe("codex_transient_upstream");
expect(result.errorMessage).toContain("high demand");
} finally {
if (previousHome === undefined) delete process.env.HOME;
else process.env.HOME = previousHome;
await fs.rm(root, { recursive: true, force: true });
}
});
it("uses safer invocation settings and a fresh-session handoff for codex transient fallback retries", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-fallback-"));
const workspace = path.join(root, "workspace");
const commandPath = path.join(root, "codex");
const capturePath = path.join(root, "capture.json");
await fs.mkdir(workspace, { recursive: true });
await writeFakeCodexCommand(commandPath);
const previousHome = process.env.HOME;
process.env.HOME = root;
let commandNotes: string[] = [];
try {
const result = await execute({
runId: "run-fallback",
agent: {
id: "agent-1",
companyId: "company-1",
name: "Codex Coder",
adapterType: "codex_local",
adapterConfig: {},
},
runtime: {
sessionId: null,
sessionParams: {
sessionId: "codex-session-stale",
cwd: workspace,
},
sessionDisplayId: "codex-session-stale",
taskKey: null,
},
config: {
command: commandPath,
cwd: workspace,
fastMode: true,
model: "gpt-5.4",
env: {
PAPERCLIP_TEST_CAPTURE_PATH: capturePath,
},
promptTemplate: "Follow the paperclip heartbeat.",
},
context: {
codexTransientFallbackMode: "fresh_session_safer_invocation",
paperclipContinuationSummary: {
key: "continuation-summary",
title: "Continuation Summary",
body: "Issue continuation summary for the next fresh session.",
updatedAt: "2026-04-21T01:00:00.000Z",
},
},
authToken: "run-jwt-token",
onLog: async () => {},
onMeta: async (meta) => {
commandNotes = meta.commandNotes ?? [];
},
});
expect(result.exitCode).toBe(0);
expect(result.errorMessage).toBeNull();
const capture = JSON.parse(await fs.readFile(capturePath, "utf8")) as CapturePayload;
expect(capture.argv).toEqual(expect.arrayContaining(["exec", "--json", "-"]));
expect(capture.argv).not.toContain("resume");
expect(capture.argv).not.toContain('service_tier="fast"');
expect(capture.argv).not.toContain("features.fast_mode=true");
expect(capture.prompt).toContain("Paperclip session handoff:");
expect(capture.prompt).toContain("Issue continuation summary for the next fresh session.");
expect(commandNotes).toContain("Codex transient fallback requested safer invocation settings for this retry.");
expect(commandNotes).toContain("Codex transient fallback forced a fresh session with a continuation handoff.");
} finally {
if (previousHome === undefined) delete process.env.HOME;
else process.env.HOME = previousHome;
await fs.rm(root, { recursive: true, force: true });
}
});
it("renders execution-stage wake instructions for reviewer and executor roles", async () => {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-stage-wake-"));
const workspace = path.join(root, "workspace");

View file

@ -5,7 +5,7 @@ import {
getEmbeddedPostgresTestSupport,
startEmbeddedPostgresTestDatabase,
} from "./helpers/embedded-postgres.js";
import { dashboardService } from "../services/dashboard.ts";
import { dashboardService, getUtcMonthStart } from "../services/dashboard.ts";
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
@ -26,6 +26,17 @@ function utcDateKey(date: Date): string {
return date.toISOString().slice(0, 10);
}
describe("getUtcMonthStart", () => {
it("anchors the monthly spend window to UTC month boundaries", () => {
expect(getUtcMonthStart(new Date("2026-03-31T20:30:00.000-05:00")).toISOString()).toBe(
"2026-04-01T00:00:00.000Z",
);
expect(getUtcMonthStart(new Date("2026-04-01T00:30:00.000+14:00")).toISOString()).toBe(
"2026-03-01T00:00:00.000Z",
);
});
});
describeEmbeddedPostgres("dashboard service", () => {
let db!: ReturnType<typeof createDb>;
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;

View file

@ -538,6 +538,144 @@ describe("heartbeat comment wake batching", () => {
}
}, 120_000);
it("promotes deferred comment wakes with their comments after the active run is cancelled", async () => {
const gateway = await createControlledGatewayServer();
const companyId = randomUUID();
const agentId = randomUUID();
const issueId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const heartbeat = heartbeatService(db);
try {
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "Gateway Agent",
role: "engineer",
status: "idle",
adapterType: "openclaw_gateway",
adapterConfig: {
url: gateway.url,
headers: {
"x-openclaw-token": "gateway-token",
},
payloadTemplate: {
message: "wake now",
},
waitTimeoutMs: 2_000,
},
runtimeConfig: {},
permissions: {},
});
await db.insert(issues).values({
id: issueId,
companyId,
title: "Interrupt queued comment",
status: "todo",
priority: "medium",
assigneeAgentId: agentId,
issueNumber: 2,
identifier: `${issuePrefix}-2`,
});
const comment1 = await db
.insert(issueComments)
.values({
companyId,
issueId,
authorUserId: "user-1",
body: "Start work",
})
.returning()
.then((rows) => rows[0]);
const firstRun = await heartbeat.wakeup(agentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_commented",
payload: { issueId, commentId: comment1.id },
contextSnapshot: {
issueId,
taskId: issueId,
commentId: comment1.id,
wakeReason: "issue_commented",
},
requestedByActorType: "user",
requestedByActorId: "user-1",
});
expect(firstRun).not.toBeNull();
await waitFor(() => gateway.getAgentPayloads().length === 1);
const queuedComment = await db
.insert(issueComments)
.values({
companyId,
issueId,
authorUserId: "user-1",
body: "Queued follow-up",
})
.returning()
.then((rows) => rows[0]);
const followupRun = await heartbeat.wakeup(agentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_commented",
payload: { issueId, commentId: queuedComment.id },
contextSnapshot: {
issueId,
taskId: issueId,
commentId: queuedComment.id,
wakeReason: "issue_commented",
},
requestedByActorType: "user",
requestedByActorId: "user-1",
});
expect(followupRun).toBeNull();
await heartbeat.cancelRun(firstRun!.id);
await waitFor(() => gateway.getAgentPayloads().length === 2);
const promotedPayload = gateway.getAgentPayloads()[1] ?? {};
expect(promotedPayload.paperclip).toMatchObject({
wake: {
commentIds: [queuedComment.id],
latestCommentId: queuedComment.id,
comments: [
expect.objectContaining({
id: queuedComment.id,
body: "Queued follow-up",
}),
],
commentWindow: {
requestedCount: 1,
includedCount: 1,
missingCount: 0,
},
},
});
expect(String(promotedPayload.message ?? "")).toContain("Queued follow-up");
gateway.releaseFirstWait();
await waitFor(async () => {
const runs = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.agentId, agentId));
return runs.length === 2 && runs.every((run) => ["cancelled", "succeeded"].includes(run.status));
}, 90_000);
} finally {
gateway.releaseFirstWait();
await gateway.close();
}
}, 120_000);
it("promotes deferred comment wakes after the active run closes the issue", async () => {
const gateway = await createControlledGatewayServer();
const companyId = randomUUID();

View file

@ -132,7 +132,7 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
await tempDb?.cleanup();
});
it("keeps blocked descendants queued until their blockers resolve", async () => {
it("keeps blocked descendants idle until their blockers resolve", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const blockerId = randomUUID();
@ -200,15 +200,72 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
payload: { issueId: blockedIssueId },
contextSnapshot: { issueId: blockedIssueId, wakeReason: "issue_assigned" },
});
expect(blockedWake).not.toBeNull();
expect(blockedWake).toBeNull();
const blockedWakeRequest = await waitForCondition(async () => {
const wakeup = await db
.select({
status: agentWakeupRequests.status,
reason: agentWakeupRequests.reason,
})
.from(agentWakeupRequests)
.where(
and(
eq(agentWakeupRequests.agentId, agentId),
sql`${agentWakeupRequests.payload} ->> 'issueId' = ${blockedIssueId}`,
),
)
.orderBy(agentWakeupRequests.requestedAt)
.then((rows) => rows[0] ?? null);
return Boolean(
wakeup &&
wakeup.status === "skipped" &&
wakeup.reason === "issue_dependencies_blocked",
);
});
expect(blockedWakeRequest).toBe(true);
const blockedRunsBeforeResolution = await db
.select({ count: sql<number>`count(*)::int` })
.from(heartbeatRuns)
.where(sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${blockedIssueId}`)
.then((rows) => rows[0]?.count ?? 0);
expect(blockedRunsBeforeResolution).toBe(0);
const interactionWake = await heartbeat.wakeup(agentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_commented",
payload: { issueId: blockedIssueId, commentId: randomUUID() },
contextSnapshot: {
issueId: blockedIssueId,
wakeReason: "issue_commented",
},
});
expect(interactionWake).not.toBeNull();
await waitForCondition(async () => {
const run = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, blockedWake!.id))
.where(eq(heartbeatRuns.id, interactionWake!.id))
.then((rows) => rows[0] ?? null);
return run?.status === "queued";
return run?.status === "succeeded";
});
const interactionRun = await db
.select({
status: heartbeatRuns.status,
contextSnapshot: heartbeatRuns.contextSnapshot,
})
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, interactionWake!.id))
.then((rows) => rows[0] ?? null);
expect(interactionRun?.status).toBe("succeeded");
expect(interactionRun?.contextSnapshot).toMatchObject({
dependencyBlockedInteraction: true,
unresolvedBlockerIssueIds: [blockerId],
});
const readyWake = await heartbeat.wakeup(agentId, {
@ -229,12 +286,12 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
return run?.status === "succeeded";
});
const [blockedRun, readyRun] = await Promise.all([
db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, blockedWake!.id)).then((rows) => rows[0] ?? null),
db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, readyWake!.id)).then((rows) => rows[0] ?? null),
]);
const readyRun = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, readyWake!.id))
.then((rows) => rows[0] ?? null);
expect(blockedRun?.status).toBe("queued");
expect(readyRun?.status).toBe("succeeded");
await db
@ -242,7 +299,7 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
.set({ status: "done", updatedAt: new Date() })
.where(eq(issues.id, blockerId));
await heartbeat.wakeup(agentId, {
const promotedWake = await heartbeat.wakeup(agentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_blockers_resolved",
@ -253,12 +310,13 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
resolvedBlockerIssueId: blockerId,
},
});
expect(promotedWake).not.toBeNull();
await waitForCondition(async () => {
const run = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, blockedWake!.id))
.where(eq(heartbeatRuns.id, promotedWake!.id))
.then((rows) => rows[0] ?? null);
return run?.status === "succeeded";
});
@ -269,7 +327,7 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () =
status: heartbeatRuns.status,
})
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, blockedWake!.id))
.where(eq(heartbeatRuns.id, promotedWake!.id))
.then((rows) => rows[0] ?? null);
const blockedWakeRequestCount = await db
.select({ count: sql<number>`count(*)::int` })

View file

@ -16,6 +16,7 @@ import {
heartbeatRuns,
issueComments,
issueDocuments,
issueRelations,
issues,
} from "@paperclipai/db";
import {
@ -231,6 +232,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
await db.delete(issueDocuments);
await db.delete(documentRevisions);
await db.delete(documents);
await db.delete(issueRelations);
await db.delete(issues);
await db.delete(heartbeatRunEvents);
await db.delete(heartbeatRuns);
@ -441,6 +443,87 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
return { companyId, agentId, runId, wakeupRequestId, issueId };
}
async function seedQueuedIssueRunFixture() {
const companyId = randomUUID();
const agentId = randomUUID();
const runId = randomUUID();
const wakeupRequestId = randomUUID();
const issueId = randomUUID();
const now = new Date("2026-03-19T00:00:00.000Z");
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(agentWakeupRequests).values({
id: wakeupRequestId,
companyId,
agentId,
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: { issueId },
status: "queued",
runId,
requestedAt: now,
updatedAt: now,
});
await db.insert(heartbeatRuns).values({
id: runId,
companyId,
agentId,
invocationSource: "assignment",
triggerDetail: "system",
status: "queued",
wakeupRequestId,
contextSnapshot: {
issueId,
taskId: issueId,
wakeReason: "issue_assigned",
},
updatedAt: now,
createdAt: now,
});
await db.insert(issues).values({
id: issueId,
companyId,
title: "Retry transient Codex failure without blocking",
status: "in_progress",
priority: "medium",
assigneeAgentId: agentId,
checkoutRunId: runId,
executionRunId: runId,
issueNumber: 1,
identifier: `${issuePrefix}-1`,
startedAt: now,
});
return { companyId, agentId, runId, wakeupRequestId, issueId };
}
it("keeps a local run active when the recorded pid is still alive", async () => {
const child = spawnAliveProcess();
childProcesses.add(child);
@ -547,8 +630,11 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(issue?.executionRunId).toBe(retryRun?.id ?? null);
});
it("does not queue a second retry after the first process-loss retry was already used", async () => {
it("blocks the issue when process-loss retry is exhausted and the immediate continuation recovery also fails", async () => {
mockAdapterExecute.mockRejectedValueOnce(new Error("continuation recovery failed"));
const { agentId, runId, issueId } = await seedRunFixture({
agentStatus: "idle",
processPid: 999_999_999,
processLossRetryCount: 1,
});
@ -562,16 +648,74 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, agentId));
expect(runs).toHaveLength(1);
expect(runs[0]?.status).toBe("failed");
expect(runs).toHaveLength(2);
expect(runs.find((row) => row.id === runId)?.status).toBe("failed");
const continuationRun = runs.find((row) => row.id !== runId);
expect(continuationRun?.contextSnapshot as Record<string, unknown> | undefined).toMatchObject({
retryReason: "issue_continuation_needed",
retryOfRunId: runId,
});
const blockedIssue = await waitForValue(async () =>
db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => {
const issue = rows[0] ?? null;
return issue?.status === "blocked" ? issue : null;
})
);
expect(blockedIssue?.status).toBe("blocked");
expect(blockedIssue?.executionRunId).toBeNull();
expect(blockedIssue?.checkoutRunId).toBe(continuationRun?.id ?? null);
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried continuation");
});
it("schedules a bounded retry for codex transient upstream failures instead of blocking the issue immediately", async () => {
mockAdapterExecute.mockResolvedValueOnce({
exitCode: 1,
signal: null,
timedOut: false,
errorCode: "codex_transient_upstream",
errorMessage:
"Error running remote compact task: We're currently experiencing high demand, which may cause temporary errors.",
provider: "openai",
model: "gpt-5.4",
});
const { agentId, runId, issueId } = await seedQueuedIssueRunFixture();
const heartbeat = heartbeatService(db);
await heartbeat.resumeQueuedRuns();
await waitForRunToSettle(heartbeat, runId);
const runs = await waitForValue(async () => {
const rows = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, agentId));
return rows.length >= 2 ? rows : null;
});
expect(runs).toHaveLength(2);
const failedRun = runs?.find((row) => row.id === runId);
const retryRun = runs?.find((row) => row.id !== runId);
expect(failedRun?.status).toBe("failed");
expect(failedRun?.errorCode).toBe("codex_transient_upstream");
expect(retryRun?.status).toBe("scheduled_retry");
expect(retryRun?.scheduledRetryReason).toBe("transient_failure");
expect((retryRun?.contextSnapshot as Record<string, unknown> | null)?.codexTransientFallbackMode).toBe("same_session");
const issue = await db
.select()
.from(issues)
.where(eq(issues.id, issueId))
.then((rows) => rows[0] ?? null);
expect(issue?.executionRunId).toBeNull();
expect(issue?.checkoutRunId).toBe(runId);
expect(issue?.status).toBe("in_progress");
expect(issue?.executionRunId).toBe(retryRun?.id ?? null);
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(0);
});
it("clears the detached warning when the run reports activity again", async () => {
@ -675,6 +819,107 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
});
it("assigns open unassigned blockers back to their creator agent", async () => {
const companyId = randomUUID();
const creatorAgentId = randomUUID();
const blockedAssigneeAgentId = randomUUID();
const blockerIssueId = randomUUID();
const blockedIssueId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values([
{
id: creatorAgentId,
companyId,
name: "SecurityEngineer",
role: "engineer",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
},
{
id: blockedAssigneeAgentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
},
]);
await db.insert(issues).values([
{
id: blockerIssueId,
companyId,
title: "Fix blocker",
status: "todo",
priority: "high",
createdByAgentId: creatorAgentId,
issueNumber: 1,
identifier: `${issuePrefix}-1`,
},
{
id: blockedIssueId,
companyId,
title: "Blocked work",
status: "blocked",
priority: "high",
assigneeAgentId: blockedAssigneeAgentId,
issueNumber: 2,
identifier: `${issuePrefix}-2`,
},
]);
await db.insert(issueRelations).values({
companyId,
issueId: blockerIssueId,
relatedIssueId: blockedIssueId,
type: "blocks",
createdByAgentId: creatorAgentId,
});
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileStrandedAssignedIssues();
expect(result.orphanBlockersAssigned).toBe(1);
expect(result.issueIds).toContain(blockerIssueId);
const blocker = await db
.select()
.from(issues)
.where(eq(issues.id, blockerIssueId))
.then((rows) => rows[0] ?? null);
expect(blocker?.assigneeAgentId).toBe(creatorAgentId);
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, blockerIssueId));
expect(comments[0]?.body).toContain("Assigned Orphan Blocker");
expect(comments[0]?.body).toContain(`[${issuePrefix}-2](/${issuePrefix}/issues/${issuePrefix}-2)`);
const wakeups = await db.select().from(agentWakeupRequests).where(eq(agentWakeupRequests.agentId, creatorAgentId));
expect(wakeups).toEqual([
expect.objectContaining({
reason: "issue_assigned",
payload: expect.objectContaining({
issueId: blockerIssueId,
mutation: "unassigned_blocker_recovery",
}),
}),
]);
const runId = wakeups[0]?.runId;
if (runId) {
await waitForRunToSettle(heartbeat, runId);
}
});
it("re-enqueues continuation for stranded in-progress work with no active run", async () => {
const { agentId, issueId, runId } = await seedStrandedIssueFixture({
status: "in_progress",
@ -851,7 +1096,6 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
const wakes = await db.select().from(agentWakeupRequests).where(eq(agentWakeupRequests.agentId, agentId));
expect(wakes.some((row) => row.reason === "run_liveness_continuation")).toBe(false);
});
it("blocks stranded in-progress work after the continuation retry was already used", async () => {
const { issueId } = await seedStrandedIssueFixture({
status: "in_progress",

View file

@ -0,0 +1,338 @@
import { randomUUID } from "node:crypto";
import { eq, sql } from "drizzle-orm";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
agents,
agentWakeupRequests,
companies,
createDb,
heartbeatRunEvents,
heartbeatRuns,
} from "@paperclipai/db";
import {
getEmbeddedPostgresTestSupport,
startEmbeddedPostgresTestDatabase,
} from "./helpers/embedded-postgres.js";
import {
BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS,
heartbeatService,
} from "../services/heartbeat.ts";
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
if (!embeddedPostgresSupport.supported) {
console.warn(
`Skipping embedded Postgres heartbeat retry scheduling tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`,
);
}
describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => {
let db!: ReturnType<typeof createDb>;
let heartbeat!: ReturnType<typeof heartbeatService>;
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
beforeAll(async () => {
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-heartbeat-retry-scheduling-");
db = createDb(tempDb.connectionString);
heartbeat = heartbeatService(db);
}, 20_000);
afterEach(async () => {
await db.delete(heartbeatRunEvents);
await db.delete(heartbeatRuns);
await db.delete(agentWakeupRequests);
await db.delete(agents);
await db.delete(companies);
});
afterAll(async () => {
await tempDb?.cleanup();
});
async function seedRetryFixture(input: {
runId: string;
companyId: string;
agentId: string;
now: Date;
errorCode: string;
scheduledRetryAttempt?: number;
}) {
await db.insert(companies).values({
id: input.companyId,
name: "Paperclip",
issuePrefix: `T${input.companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: input.agentId,
companyId: input.companyId,
name: "CodexCoder",
role: "engineer",
status: "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: input.runId,
companyId: input.companyId,
agentId: input.agentId,
invocationSource: "assignment",
status: "failed",
error: "upstream overload",
errorCode: input.errorCode,
finishedAt: input.now,
scheduledRetryAttempt: input.scheduledRetryAttempt ?? 0,
scheduledRetryReason: input.scheduledRetryAttempt ? "transient_failure" : null,
contextSnapshot: {
issueId: randomUUID(),
wakeReason: "issue_assigned",
},
updatedAt: input.now,
createdAt: input.now,
});
}
it("schedules a retry with durable metadata and only promotes it when due", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const sourceRunId = randomUUID();
const now = new Date("2026-04-20T12:00:00.000Z");
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: sourceRunId,
companyId,
agentId,
invocationSource: "assignment",
status: "failed",
error: "upstream overload",
errorCode: "adapter_failed",
finishedAt: now,
contextSnapshot: {
issueId: randomUUID(),
wakeReason: "issue_assigned",
},
updatedAt: now,
createdAt: now,
});
const scheduled = await heartbeat.scheduleBoundedRetry(sourceRunId, {
now,
random: () => 0.5,
});
expect(scheduled.outcome).toBe("scheduled");
if (scheduled.outcome !== "scheduled") return;
const expectedDueAt = new Date(now.getTime() + BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS[0]);
expect(scheduled.attempt).toBe(1);
expect(scheduled.dueAt.toISOString()).toBe(expectedDueAt.toISOString());
const retryRun = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, scheduled.run.id))
.then((rows) => rows[0] ?? null);
expect(retryRun).toMatchObject({
status: "scheduled_retry",
retryOfRunId: sourceRunId,
scheduledRetryAttempt: 1,
scheduledRetryReason: "transient_failure",
});
expect(retryRun?.scheduledRetryAt?.toISOString()).toBe(expectedDueAt.toISOString());
const earlyPromotion = await heartbeat.promoteDueScheduledRetries(new Date("2026-04-20T12:01:59.000Z"));
expect(earlyPromotion).toEqual({ promoted: 0, runIds: [] });
const stillScheduled = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, scheduled.run.id))
.then((rows) => rows[0] ?? null);
expect(stillScheduled?.status).toBe("scheduled_retry");
const duePromotion = await heartbeat.promoteDueScheduledRetries(expectedDueAt);
expect(duePromotion).toEqual({ promoted: 1, runIds: [scheduled.run.id] });
const promotedRun = await db
.select({ status: heartbeatRuns.status })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, scheduled.run.id))
.then((rows) => rows[0] ?? null);
expect(promotedRun?.status).toBe("queued");
});
it("exhausts bounded retries after the hard cap", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const cappedRunId = randomUUID();
const now = new Date("2026-04-20T18:00:00.000Z");
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "active",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: {
wakeOnDemand: true,
maxConcurrentRuns: 1,
},
},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: cappedRunId,
companyId,
agentId,
invocationSource: "automation",
status: "failed",
error: "still transient",
errorCode: "adapter_failed",
finishedAt: now,
scheduledRetryAttempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length,
scheduledRetryReason: "transient_failure",
contextSnapshot: {
wakeReason: "transient_failure_retry",
},
updatedAt: now,
createdAt: now,
});
const exhausted = await heartbeat.scheduleBoundedRetry(cappedRunId, {
now,
random: () => 0.5,
});
expect(exhausted).toEqual({
outcome: "retry_exhausted",
attempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length + 1,
maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length,
});
const runCount = await db
.select({ count: sql<number>`count(*)::int` })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.companyId, companyId))
.then((rows) => rows[0]?.count ?? 0);
expect(runCount).toBe(1);
const exhaustionEvent = await db
.select({
message: heartbeatRunEvents.message,
payload: heartbeatRunEvents.payload,
})
.from(heartbeatRunEvents)
.where(eq(heartbeatRunEvents.runId, cappedRunId))
.orderBy(sql`${heartbeatRunEvents.id} desc`)
.then((rows) => rows[0] ?? null);
expect(exhaustionEvent?.message).toContain("Bounded retry exhausted");
expect(exhaustionEvent?.payload).toMatchObject({
retryReason: "transient_failure",
scheduledRetryAttempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length,
maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length,
});
});
it("advances codex transient fallback stages across bounded retry attempts", async () => {
const fallbackModes = [
"same_session",
"safer_invocation",
"fresh_session",
"fresh_session_safer_invocation",
] as const;
for (const [index, expectedMode] of fallbackModes.entries()) {
const companyId = randomUUID();
const agentId = randomUUID();
const runId = randomUUID();
const now = new Date(`2026-04-20T1${index}:00:00.000Z`);
await seedRetryFixture({
runId,
companyId,
agentId,
now,
errorCode: "codex_transient_upstream",
scheduledRetryAttempt: index,
});
const scheduled = await heartbeat.scheduleBoundedRetry(runId, {
now,
random: () => 0.5,
});
expect(scheduled.outcome).toBe("scheduled");
if (scheduled.outcome !== "scheduled") continue;
const retryRun = await db
.select({
contextSnapshot: heartbeatRuns.contextSnapshot,
wakeupRequestId: heartbeatRuns.wakeupRequestId,
})
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, scheduled.run.id))
.then((rows) => rows[0] ?? null);
expect((retryRun?.contextSnapshot as Record<string, unknown> | null)?.codexTransientFallbackMode).toBe(expectedMode);
const wakeupRequest = await db
.select({ payload: agentWakeupRequests.payload })
.from(agentWakeupRequests)
.where(eq(agentWakeupRequests.id, retryRun?.wakeupRequestId ?? ""))
.then((rows) => rows[0] ?? null);
expect((wakeupRequest?.payload as Record<string, unknown> | null)?.codexTransientFallbackMode).toBe(expectedMode);
await db.delete(heartbeatRunEvents);
await db.delete(heartbeatRuns);
await db.delete(agentWakeupRequests);
await db.delete(agents);
await db.delete(companies);
}
});
});

View file

@ -7,6 +7,7 @@ const mockIssueService = vi.hoisted(() => ({
assertCheckoutOwner: vi.fn(),
update: vi.fn(),
addComment: vi.fn(),
getDependencyReadiness: vi.fn(),
findMentionedAgents: vi.fn(),
listWakeableBlockedDependents: vi.fn(),
getWakeableParentAfterChildCompletion: vi.fn(),
@ -199,6 +200,7 @@ describe("issue comment reopen routes", () => {
mockIssueService.assertCheckoutOwner.mockReset();
mockIssueService.update.mockReset();
mockIssueService.addComment.mockReset();
mockIssueService.getDependencyReadiness.mockReset();
mockIssueService.findMentionedAgents.mockReset();
mockIssueService.listWakeableBlockedDependents.mockReset();
mockIssueService.getWakeableParentAfterChildCompletion.mockReset();
@ -255,6 +257,14 @@ describe("issue comment reopen routes", () => {
authorUserId: "local-board",
});
mockIssueService.findMentionedAgents.mockResolvedValue([]);
mockIssueService.getDependencyReadiness.mockResolvedValue({
issueId: "11111111-1111-4111-8111-111111111111",
blockerIssueIds: [],
unresolvedBlockerIssueIds: [],
unresolvedBlockerCount: 0,
allBlockersDone: true,
isDependencyReady: true,
});
mockIssueService.listWakeableBlockedDependents.mockResolvedValue([]);
mockIssueService.getWakeableParentAfterChildCompletion.mockResolvedValue(null);
mockIssueService.assertCheckoutOwner.mockResolvedValue({ adoptedFromRunId: null });
@ -442,6 +452,75 @@ describe("issue comment reopen routes", () => {
);
});
it("moves assigned blocked issues back to todo via POST comments", async () => {
mockIssueService.getById.mockResolvedValue(makeIssue("blocked"));
mockIssueService.update.mockImplementation(async (_id: string, patch: Record<string, unknown>) => ({
...makeIssue("blocked"),
...patch,
}));
const res = await request(await installActor(createApp()))
.post("/api/issues/11111111-1111-4111-8111-111111111111/comments")
.send({ body: "please continue" });
expect(res.status).toBe(201);
expect(mockIssueService.update).toHaveBeenCalledWith(
"11111111-1111-4111-8111-111111111111",
{ status: "todo" },
);
expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith(
"22222222-2222-4222-8222-222222222222",
expect.objectContaining({
reason: "issue_reopened_via_comment",
payload: expect.objectContaining({
commentId: "comment-1",
reopenedFrom: "blocked",
mutation: "comment",
}),
contextSnapshot: expect.objectContaining({
issueId: "11111111-1111-4111-8111-111111111111",
wakeCommentId: "comment-1",
wakeReason: "issue_reopened_via_comment",
reopenedFrom: "blocked",
}),
}),
);
});
it("does not move dependency-blocked issues to todo via POST comments", async () => {
mockIssueService.getById.mockResolvedValue(makeIssue("blocked"));
mockIssueService.getDependencyReadiness.mockResolvedValue({
issueId: "11111111-1111-4111-8111-111111111111",
blockerIssueIds: ["33333333-3333-4333-8333-333333333333"],
unresolvedBlockerIssueIds: ["33333333-3333-4333-8333-333333333333"],
unresolvedBlockerCount: 1,
allBlockersDone: false,
isDependencyReady: false,
});
const res = await request(await installActor(createApp()))
.post("/api/issues/11111111-1111-4111-8111-111111111111/comments")
.send({ body: "what is happening?" });
expect(res.status).toBe(201);
expect(mockIssueService.update).not.toHaveBeenCalled();
expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith(
"22222222-2222-4222-8222-222222222222",
expect.objectContaining({
reason: "issue_commented",
payload: expect.objectContaining({
commentId: "comment-1",
mutation: "comment",
}),
contextSnapshot: expect.objectContaining({
issueId: "11111111-1111-4111-8111-111111111111",
wakeCommentId: "comment-1",
wakeReason: "issue_commented",
}),
}),
);
});
it("does not implicitly reopen closed issues via POST comments when no agent is assigned", async () => {
mockIssueService.getById.mockResolvedValue({
...makeIssue("done"),
@ -457,6 +536,82 @@ describe("issue comment reopen routes", () => {
expect(mockIssueService.update).not.toHaveBeenCalled();
});
it("moves assigned blocked issues back to todo via the PATCH comment path", async () => {
mockIssueService.getById.mockResolvedValue(makeIssue("blocked"));
mockIssueService.update.mockImplementation(async (_id: string, patch: Record<string, unknown>) => ({
...makeIssue("blocked"),
...patch,
}));
const res = await request(await installActor(createApp()))
.patch("/api/issues/11111111-1111-4111-8111-111111111111")
.send({ comment: "please continue" });
expect(res.status).toBe(200);
expect(mockIssueService.update).toHaveBeenCalledWith(
"11111111-1111-4111-8111-111111111111",
expect.objectContaining({
status: "todo",
actorAgentId: null,
actorUserId: "local-board",
}),
);
expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith(
"22222222-2222-4222-8222-222222222222",
expect.objectContaining({
reason: "issue_reopened_via_comment",
payload: expect.objectContaining({
commentId: "comment-1",
reopenedFrom: "blocked",
mutation: "comment",
}),
}),
);
});
it("does not move dependency-blocked issues to todo via the PATCH comment path", async () => {
mockIssueService.getById.mockResolvedValue(makeIssue("blocked"));
mockIssueService.getDependencyReadiness.mockResolvedValue({
issueId: "11111111-1111-4111-8111-111111111111",
blockerIssueIds: ["33333333-3333-4333-8333-333333333333"],
unresolvedBlockerIssueIds: ["33333333-3333-4333-8333-333333333333"],
unresolvedBlockerCount: 1,
allBlockersDone: false,
isDependencyReady: false,
});
mockIssueService.update.mockImplementation(async (_id: string, patch: Record<string, unknown>) => ({
...makeIssue("blocked"),
...patch,
}));
const res = await request(await installActor(createApp()))
.patch("/api/issues/11111111-1111-4111-8111-111111111111")
.send({ comment: "what is happening?" });
expect(res.status).toBe(200);
expect(mockIssueService.update).toHaveBeenCalledWith(
"11111111-1111-4111-8111-111111111111",
expect.objectContaining({
actorAgentId: null,
actorUserId: "local-board",
}),
);
expect(mockIssueService.update).not.toHaveBeenCalledWith(
"11111111-1111-4111-8111-111111111111",
expect.objectContaining({ status: "todo" }),
);
expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith(
"22222222-2222-4222-8222-222222222222",
expect.objectContaining({
reason: "issue_commented",
payload: expect.objectContaining({
commentId: "comment-1",
mutation: "comment",
}),
}),
);
});
it("wakes the assignee when an assigned blocked issue moves back to todo", async () => {
const issue = makeIssue("blocked");
mockIssueService.getById.mockResolvedValue(issue);

View file

@ -27,6 +27,10 @@ const mockDocumentsService = vi.hoisted(() => ({
getIssueDocumentByKey: vi.fn(),
}));
const mockExecutionWorkspaceService = vi.hoisted(() => ({
getById: vi.fn(),
}));
vi.mock("../services/index.js", () => ({
accessService: () => ({
canUser: vi.fn(),
@ -36,9 +40,7 @@ vi.mock("../services/index.js", () => ({
getById: vi.fn(),
}),
documentService: () => mockDocumentsService,
executionWorkspaceService: () => ({
getById: vi.fn(),
}),
executionWorkspaceService: () => mockExecutionWorkspaceService,
feedbackService: () => ({
listIssueVotesForUser: vi.fn(async () => []),
saveIssueVote: vi.fn(async () => ({ vote: null, consentEnabledNow: false, sharingEnabled: false })),
@ -157,6 +159,7 @@ describe("issue goal context routes", () => {
mockIssueService.listAttachments.mockResolvedValue([]);
mockDocumentsService.getIssueDocumentPayload.mockResolvedValue({});
mockDocumentsService.getIssueDocumentByKey.mockResolvedValue(null);
mockExecutionWorkspaceService.getById.mockResolvedValue(null);
mockProjectService.getById.mockResolvedValue({
id: legacyProjectLinkedIssue.projectId,
companyId: "company-1",
@ -285,4 +288,44 @@ describe("issue goal context routes", () => {
}),
]);
});
it("surfaces the current execution workspace from GET /issues/:id/heartbeat-context", async () => {
mockIssueService.getById.mockResolvedValue({
...legacyProjectLinkedIssue,
executionWorkspaceId: "55555555-5555-4555-8555-555555555555",
});
mockExecutionWorkspaceService.getById.mockResolvedValue({
id: "55555555-5555-4555-8555-555555555555",
name: "PAP-581 workspace",
mode: "isolated_workspace",
status: "active",
cwd: "/tmp/pap-581",
runtimeServices: [
{
id: "service-1",
serviceName: "web",
status: "running",
url: "http://127.0.0.1:5173",
healthStatus: "healthy",
},
],
});
const res = await request(await createApp()).get(
"/api/issues/11111111-1111-4111-8111-111111111111/heartbeat-context",
);
expect(res.status).toBe(200);
expect(mockExecutionWorkspaceService.getById).toHaveBeenCalledWith("55555555-5555-4555-8555-555555555555");
expect(res.body.currentExecutionWorkspace).toEqual(expect.objectContaining({
id: "55555555-5555-4555-8555-555555555555",
mode: "isolated_workspace",
runtimeServices: [
expect.objectContaining({
serviceName: "web",
url: "http://127.0.0.1:5173",
}),
],
}));
});
});

View file

@ -469,6 +469,88 @@ describeEmbeddedPostgres("issueService.list participantAgentId", () => {
expect(result.map((issue) => issue.id)).toEqual([linkedIssueId]);
});
it("filters issues by generic workspace id across execution and project workspace links", async () => {
const companyId = randomUUID();
const projectId = randomUUID();
const projectWorkspaceId = randomUUID();
const executionWorkspaceId = randomUUID();
const executionLinkedIssueId = randomUUID();
const projectLinkedIssueId = randomUUID();
const otherIssueId = randomUUID();
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(projects).values({
id: projectId,
companyId,
name: "Workspace project",
status: "in_progress",
});
await db.insert(projectWorkspaces).values({
id: projectWorkspaceId,
companyId,
projectId,
name: "Feature workspace",
sourceType: "local_path",
visibility: "default",
isPrimary: false,
});
await db.insert(executionWorkspaces).values({
id: executionWorkspaceId,
companyId,
projectId,
projectWorkspaceId,
mode: "isolated_workspace",
strategyType: "git_worktree",
name: "Execution workspace",
status: "active",
providerType: "git_worktree",
});
await db.insert(issues).values([
{
id: executionLinkedIssueId,
companyId,
projectId,
projectWorkspaceId,
title: "Execution linked issue",
status: "done",
priority: "medium",
executionWorkspaceId,
},
{
id: projectLinkedIssueId,
companyId,
projectId,
projectWorkspaceId,
title: "Project linked issue",
status: "todo",
priority: "medium",
},
{
id: otherIssueId,
companyId,
projectId,
title: "Other issue",
status: "todo",
priority: "medium",
},
]);
const executionResult = await svc.list(companyId, { workspaceId: executionWorkspaceId });
const projectResult = await svc.list(companyId, { workspaceId: projectWorkspaceId });
expect(executionResult.map((issue) => issue.id)).toEqual([executionLinkedIssueId]);
expect(projectResult.map((issue) => issue.id).sort()).toEqual([executionLinkedIssueId, projectLinkedIssueId].sort());
});
it("hides archived inbox issues until new external activity arrives", async () => {
const companyId = randomUUID();
const userId = "user-1";
@ -740,6 +822,33 @@ describeEmbeddedPostgres("issueService.list participantAgentId", () => {
expect(result?.executionState).toBeNull();
expect(result?.executionWorkspaceSettings).toBeNull();
});
it("does not let description preview truncation split multibyte characters", async () => {
const companyId = randomUUID();
const issueId = randomUUID();
const description = `${"x".repeat(1199)}— still valid after truncation`;
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(issues).values({
id: issueId,
companyId,
title: "Multibyte boundary issue",
description,
status: "todo",
priority: "medium",
});
const [result] = await svc.list(companyId);
expect(result?.description).toHaveLength(1200);
expect(result?.description?.endsWith("—")).toBe(true);
});
});
describeEmbeddedPostgres("issueService.create workspace inheritance", () => {

View file

@ -349,6 +349,60 @@ describeEmbeddedPostgres("routine service live-execution coalescing", () => {
expect(routineIssues[0]?.id).toBe(previousIssue.id);
});
it("does not coalesce live routine runs with different resolved variables", async () => {
const { companyId, agentId, projectId, svc } = await seedFixture();
const variableRoutine = await svc.create(
companyId,
{
projectId,
goalId: null,
parentIssueId: null,
title: "pre-pr for {{branch}}",
description: "Create a pre-PR from {{branch}}",
assigneeAgentId: agentId,
priority: "medium",
status: "active",
concurrencyPolicy: "coalesce_if_active",
catchUpPolicy: "skip_missed",
variables: [
{ name: "branch", label: null, type: "text", defaultValue: null, required: true, options: [] },
],
},
{},
);
const first = await svc.runRoutine(variableRoutine.id, {
source: "manual",
variables: { branch: "feature/a" },
});
const second = await svc.runRoutine(variableRoutine.id, {
source: "manual",
variables: { branch: "feature/b" },
});
expect(first.status).toBe("issue_created");
expect(second.status).toBe("issue_created");
expect(first.linkedIssueId).toBeTruthy();
expect(second.linkedIssueId).toBeTruthy();
expect(first.linkedIssueId).not.toBe(second.linkedIssueId);
const routineIssues = await db
.select({
id: issues.id,
title: issues.title,
originFingerprint: issues.originFingerprint,
})
.from(issues)
.where(eq(issues.originId, variableRoutine.id));
expect(routineIssues).toHaveLength(2);
expect(routineIssues.map((issue) => issue.title).sort()).toEqual([
"pre-pr for feature/a",
"pre-pr for feature/b",
]);
expect(new Set(routineIssues.map((issue) => issue.originFingerprint)).size).toBe(2);
});
it("interpolates routine variables into the execution issue and stores resolved values", async () => {
const { companyId, agentId, projectId, svc } = await seedFixture();
const variableRoutine = await svc.create(

View file

@ -118,6 +118,7 @@ vi.mock("../services/index.js", () => ({
feedbackService: feedbackServiceFactoryMock,
heartbeatService: vi.fn(() => ({
reapOrphanedRuns: vi.fn(async () => undefined),
promoteDueScheduledRetries: vi.fn(async () => ({ promoted: 0, runIds: [] })),
resumeQueuedRuns: vi.fn(async () => undefined),
reconcileStrandedAssignedIssues: vi.fn(async () => ({
dispatchRequeued: 0,

View file

@ -663,15 +663,20 @@ export async function startServer(): Promise<StartedServer> {
// then resume any persisted queued runs that were waiting on the previous process.
void heartbeat
.reapOrphanedRuns()
.then(() => heartbeat.resumeQueuedRuns())
.then(async () => {
.then(() => heartbeat.promoteDueScheduledRetries())
.then(async (promotion) => {
await heartbeat.resumeQueuedRuns();
const reconciled = await heartbeat.reconcileStrandedAssignedIssues();
if (
promotion.promoted > 0 ||
reconciled.dispatchRequeued > 0 ||
reconciled.continuationRequeued > 0 ||
reconciled.escalated > 0
) {
logger.warn({ ...reconciled }, "startup stranded-issue reconciliation changed assigned issue state");
logger.warn(
{ promotedScheduledRetries: promotion.promoted, promotedScheduledRetryRunIds: promotion.runIds, ...reconciled },
"startup heartbeat recovery changed assigned issue state",
);
}
})
.then(async () => {
@ -710,15 +715,20 @@ export async function startServer(): Promise<StartedServer> {
// persisted queued work is still being driven forward.
void heartbeat
.reapOrphanedRuns({ staleThresholdMs: 5 * 60 * 1000 })
.then(() => heartbeat.resumeQueuedRuns())
.then(async () => {
.then(() => heartbeat.promoteDueScheduledRetries())
.then(async (promotion) => {
await heartbeat.resumeQueuedRuns();
const reconciled = await heartbeat.reconcileStrandedAssignedIssues();
if (
promotion.promoted > 0 ||
reconciled.dispatchRequeued > 0 ||
reconciled.continuationRequeued > 0 ||
reconciled.escalated > 0
) {
logger.warn({ ...reconciled }, "periodic stranded-issue reconciliation changed assigned issue state");
logger.warn(
{ promotedScheduledRetries: promotion.promoted, promotedScheduledRetryRunIds: promotion.runIds, ...reconciled },
"periodic heartbeat recovery changed assigned issue state",
);
}
})
.then(async () => {

View file

@ -2,7 +2,7 @@ import { Router } from "express";
import { z } from "zod";
import type { Db } from "@paperclipai/db";
import { validate } from "../middleware/validate.js";
import { activityService } from "../services/activity.js";
import { activityService, normalizeActivityLimit } from "../services/activity.js";
import { assertAuthenticated, assertBoard, assertCompanyAccess } from "./authz.js";
import { heartbeatService, issueService } from "../services/index.js";
import { sanitizeRecord } from "../redaction.js";
@ -39,6 +39,7 @@ export function activityRoutes(db: Db) {
agentId: req.query.agentId as string | undefined,
entityType: req.query.entityType as string | undefined,
entityId: req.query.entityId as string | undefined,
limit: normalizeActivityLimit(Number(req.query.limit)),
};
const result = await svc.list(filters);
res.json(result);

View file

@ -2155,7 +2155,6 @@ export function agentRoutes(db: Db) {
res.status(409).json({ error: "Only pending approval agents can be approved" });
return;
}
const approval = await svc.activatePendingApproval(id);
if (!approval) {
res.status(404).json({ error: "Agent not found" });
@ -2515,7 +2514,13 @@ export function agentRoutes(db: Db) {
return;
}
assertCompanyAccess(req, run.companyId);
res.json(redactCurrentUserValue(run, await getCurrentUserRedactionOptions()));
const retryExhaustedReason = await heartbeat.getRetryExhaustedReason(runId);
res.json(
redactCurrentUserValue(
{ ...run, retryExhaustedReason },
await getCurrentUserRedactionOptions(),
),
);
});
router.post("/heartbeat-runs/:runId/cancel", async (req, res) => {

View file

@ -173,13 +173,13 @@ function isClosedIssueStatus(status: string | null | undefined): status is "done
return status === "done" || status === "cancelled";
}
function shouldImplicitlyReopenCommentForAgent(input: {
function shouldImplicitlyMoveCommentedIssueToTodoForAgent(input: {
issueStatus: string | null | undefined;
assigneeAgentId: string | null | undefined;
actorType: "agent" | "user";
actorId: string;
}) {
if (!isClosedIssueStatus(input.issueStatus)) return false;
if (!isClosedIssueStatus(input.issueStatus) && input.issueStatus !== "blocked") return false;
if (typeof input.assigneeAgentId !== "string" || input.assigneeAgentId.length === 0) return false;
if (input.actorType === "agent" && input.actorId === input.assigneeAgentId) return false;
return true;
@ -721,6 +721,7 @@ export function issueRoutes(
inboxArchivedByUserId,
unreadForUserId,
projectId: req.query.projectId as string | undefined,
workspaceId: req.query.workspaceId as string | undefined,
executionWorkspaceId: req.query.executionWorkspaceId as string | undefined,
parentId: req.query.parentId as string | undefined,
labelId: req.query.labelId as string | undefined,
@ -804,16 +805,29 @@ export function issueRoutes(
? req.query.wakeCommentId.trim()
: null;
const [{ project, goal }, ancestors, commentCursor, wakeComment, relations, attachments, continuationSummary] =
const currentExecutionWorkspacePromise = issue.executionWorkspaceId
? executionWorkspacesSvc.getById(issue.executionWorkspaceId)
: Promise.resolve(null);
const [
{ project, goal },
ancestors,
commentCursor,
wakeComment,
relations,
attachments,
continuationSummary,
currentExecutionWorkspace,
] =
await Promise.all([
resolveIssueProjectAndGoal(issue),
svc.getAncestors(issue.id),
svc.getCommentCursor(issue.id),
wakeCommentId ? svc.getComment(wakeCommentId) : null,
svc.getRelationSummaries(issue.id),
svc.listAttachments(issue.id),
documentsSvc.getIssueDocumentByKey(issue.id, ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY),
]);
resolveIssueProjectAndGoal(issue),
svc.getAncestors(issue.id),
svc.getCommentCursor(issue.id),
wakeCommentId ? svc.getComment(wakeCommentId) : null,
svc.getRelationSummaries(issue.id),
svc.listAttachments(issue.id),
documentsSvc.getIssueDocumentByKey(issue.id, ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY),
currentExecutionWorkspacePromise,
]);
res.json({
issue: {
@ -879,6 +893,7 @@ export function issueRoutes(
updatedAt: continuationSummary.updatedAt,
}
: null,
currentExecutionWorkspace,
});
});
@ -1590,6 +1605,7 @@ export function issueRoutes(
const actor = getActorInfo(req);
const isClosed = isClosedIssueStatus(existing.status);
const isBlocked = existing.status === "blocked";
const normalizedAssigneeAgentId = await normalizeIssueAssigneeAgentReference(
existing.companyId,
req.body.assigneeAgentId as string | null | undefined,
@ -1608,10 +1624,10 @@ export function issueRoutes(
} = req.body;
const requestedAssigneeAgentId =
normalizedAssigneeAgentId === undefined ? existing.assigneeAgentId : normalizedAssigneeAgentId;
const effectiveReopenRequested =
const effectiveMoveToTodoRequested =
reopenRequested ||
(!!commentBody &&
shouldImplicitlyReopenCommentForAgent({
shouldImplicitlyMoveCommentedIssueToTodoForAgent({
issueStatus: existing.status,
assigneeAgentId: requestedAssigneeAgentId,
actorType: actor.actorType,
@ -1620,6 +1636,10 @@ export function issueRoutes(
const updateReferenceSummaryBefore = titleOrDescriptionChanged
? await issueReferencesSvc.listIssueReferenceSummary(existing.id)
: null;
const hasUnresolvedFirstClassBlockers =
isBlocked && effectiveMoveToTodoRequested
? (await svc.getDependencyReadiness(existing.id)).unresolvedBlockerCount > 0
: false;
let interruptedRunId: string | null = null;
const closedExecutionWorkspace = await getClosedIssueExecutionWorkspace(existing);
const isAgentWorkUpdate = req.actor.type === "agent" && Object.keys(updateFields).length > 0;
@ -1662,7 +1682,12 @@ export function issueRoutes(
if (hiddenAtRaw !== undefined) {
updateFields.hiddenAt = hiddenAtRaw ? new Date(hiddenAtRaw) : null;
}
if (commentBody && effectiveReopenRequested && isClosed && updateFields.status === undefined) {
if (
commentBody &&
effectiveMoveToTodoRequested &&
(isClosed || (isBlocked && !hasUnresolvedFirstClassBlockers)) &&
updateFields.status === undefined
) {
updateFields.status = "todo";
}
if (req.body.executionPolicy !== undefined) {
@ -1836,8 +1861,8 @@ export function issueRoutes(
const hasFieldChanges = Object.keys(previous).length > 0;
const reopened =
commentBody &&
effectiveReopenRequested &&
isClosed &&
effectiveMoveToTodoRequested &&
(isClosed || (isBlocked && !hasUnresolvedFirstClassBlockers)) &&
previous.status !== undefined &&
issue.status === "todo";
const reopenFromStatus = reopened ? existing.status : null;
@ -2025,7 +2050,7 @@ export function issueRoutes(
const statusChangedFromBlockedToTodo =
existing.status === "blocked" &&
issue.status === "todo" &&
req.body.status !== undefined;
(req.body.status !== undefined || reopened);
const previousExecutionState = parseIssueExecutionState(existing.executionState);
const nextExecutionState = parseIssueExecutionState(issue.executionState);
const executionStageWakeup = buildExecutionStageWakeup({
@ -2596,21 +2621,26 @@ export function issueRoutes(
const reopenRequested = req.body.reopen === true;
const interruptRequested = req.body.interrupt === true;
const isClosed = isClosedIssueStatus(issue.status);
const effectiveReopenRequested =
const isBlocked = issue.status === "blocked";
const effectiveMoveToTodoRequested =
reopenRequested ||
shouldImplicitlyReopenCommentForAgent({
shouldImplicitlyMoveCommentedIssueToTodoForAgent({
issueStatus: issue.status,
assigneeAgentId: issue.assigneeAgentId,
actorType: actor.actorType,
actorId: actor.actorId,
});
const hasUnresolvedFirstClassBlockers =
isBlocked && effectiveMoveToTodoRequested
? (await svc.getDependencyReadiness(issue.id)).unresolvedBlockerCount > 0
: false;
let reopened = false;
let reopenFromStatus: string | null = null;
let interruptedRunId: string | null = null;
let currentIssue = issue;
const commentReferenceSummaryBefore = await issueReferencesSvc.listIssueReferenceSummary(issue.id);
if (effectiveReopenRequested && isClosed) {
if (effectiveMoveToTodoRequested && (isClosed || (isBlocked && !hasUnresolvedFirstClassBlockers))) {
const reopenedIssue = await svc.update(id, { status: "todo" });
if (!reopenedIssue) {
res.status(404).json({ error: "Issue not found" });

View file

@ -1,4 +1,4 @@
import { and, desc, eq, isNull, or, sql } from "drizzle-orm";
import { and, asc, desc, eq, inArray, isNull, or, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import {
activityLog,
@ -21,6 +21,15 @@ export interface ActivityFilters {
agentId?: string;
entityType?: string;
entityId?: string;
limit?: number;
}
const DEFAULT_ACTIVITY_LIMIT = 100;
const MAX_ACTIVITY_LIMIT = 500;
export function normalizeActivityLimit(limit: number | undefined) {
if (!Number.isFinite(limit)) return DEFAULT_ACTIVITY_LIMIT;
return Math.max(1, Math.min(MAX_ACTIVITY_LIMIT, Math.floor(limit ?? DEFAULT_ACTIVITY_LIMIT)));
}
export function activityService(db: Db) {
@ -316,6 +325,7 @@ export function activityService(db: Db) {
return {
list: (filters: ActivityFilters) => {
const conditions = [eq(activityLog.companyId, filters.companyId)];
const limit = normalizeActivityLimit(filters.limit);
if (filters.agentId) {
conditions.push(eq(activityLog.agentId, filters.agentId));
@ -347,6 +357,7 @@ export function activityService(db: Db) {
),
)
.orderBy(desc(activityLog.createdAt))
.limit(limit)
.then((rows) => rows.map((r) => r.activityLog));
},
@ -364,7 +375,7 @@ export function activityService(db: Db) {
runsForIssue: async (companyId: string, issueId: string) => {
scheduleRunLivenessBackfill(companyId, issueId);
return db
const runs = await db
.select({
runId: heartbeatRuns.id,
status: heartbeatRuns.status,
@ -377,6 +388,10 @@ export function activityService(db: Db) {
usageJson: summarizedUsageJson,
resultJson: summarizedResultJson,
logBytes: heartbeatRuns.logBytes,
retryOfRunId: heartbeatRuns.retryOfRunId,
scheduledRetryAt: heartbeatRuns.scheduledRetryAt,
scheduledRetryAttempt: heartbeatRuns.scheduledRetryAttempt,
scheduledRetryReason: heartbeatRuns.scheduledRetryReason,
livenessState: heartbeatRuns.livenessState,
livenessReason: heartbeatRuns.livenessReason,
continuationAttempt: heartbeatRuns.continuationAttempt,
@ -408,6 +423,34 @@ export function activityService(db: Db) {
),
)
.orderBy(desc(heartbeatRuns.createdAt));
if (runs.length === 0) return runs;
const exhaustionRows = await db
.select({
runId: heartbeatRunEvents.runId,
message: heartbeatRunEvents.message,
})
.from(heartbeatRunEvents)
.where(
and(
inArray(heartbeatRunEvents.runId, runs.map((run) => run.runId)),
eq(heartbeatRunEvents.eventType, "lifecycle"),
sql`${heartbeatRunEvents.message} like 'Bounded retry exhausted%'`,
),
)
.orderBy(asc(heartbeatRunEvents.runId), desc(heartbeatRunEvents.id));
const retryExhaustedReasonByRunId = new Map<string, string>();
for (const row of exhaustionRows) {
if (!row.message || retryExhaustedReasonByRunId.has(row.runId)) continue;
retryExhaustedReasonByRunId.set(row.runId, row.message);
}
return runs.map((run) => ({
...run,
retryExhaustedReason: retryExhaustedReasonByRunId.get(run.runId) ?? null,
}));
},
issuesForRun: async (runId: string) => {

View file

@ -10,6 +10,10 @@ function formatUtcDateKey(date: Date): string {
return date.toISOString().slice(0, 10);
}
export function getUtcMonthStart(date: Date): Date {
return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), 1));
}
function getRecentUtcDateKeys(now: Date, days: number): string[] {
const todayUtc = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
return Array.from({ length: days }, (_, index) => {
@ -76,7 +80,7 @@ export function dashboardService(db: Db) {
}
const now = new Date();
const monthStart = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), 1));
const monthStart = getUtcMonthStart(now);
const runActivityDays = getRecentUtcDateKeys(now, DASHBOARD_RUN_ACTIVITY_DAYS);
const runActivityStart = new Date(`${runActivityDays[0]}T00:00:00.000Z`);
const [{ monthSpend }] = await db

View file

@ -3,7 +3,7 @@ import path from "node:path";
import { execFile as execFileCallback } from "node:child_process";
import { promisify } from "node:util";
import { randomUUID } from "node:crypto";
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, notInArray, or, sql } from "drizzle-orm";
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, lte, notInArray, or, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import {
AGENT_DEFAULT_MAX_CONCURRENT_RUNS,
@ -134,8 +134,31 @@ const MAX_INLINE_WAKE_COMMENTS = 8;
const MAX_INLINE_WAKE_COMMENT_BODY_CHARS = 4_000;
const MAX_INLINE_WAKE_COMMENT_BODY_TOTAL_CHARS = 12_000;
const execFile = promisify(execFileCallback);
const ACTIVE_HEARTBEAT_RUN_STATUSES = ["queued", "running"] as const;
const EXECUTION_PATH_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const CANCELLABLE_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const;
export const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS = [
2 * 60 * 1000,
10 * 60 * 1000,
30 * 60 * 1000,
2 * 60 * 60 * 1000,
] as const;
const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_JITTER_RATIO = 0.25;
const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON = "transient_failure";
const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_WAKE_REASON = "transient_failure_retry";
const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_MAX_ATTEMPTS = BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length;
type CodexTransientFallbackMode =
| "same_session"
| "safer_invocation"
| "fresh_session"
| "fresh_session_safer_invocation";
function resolveCodexTransientFallbackMode(attempt: number): CodexTransientFallbackMode {
if (attempt <= 1) return "same_session";
if (attempt === 2) return "safer_invocation";
if (attempt === 3) return "fresh_session";
return "fresh_session_safer_invocation";
}
const RUNNING_ISSUE_WAKE_REASONS_REQUIRING_FOLLOWUP = new Set(["approval_approved"]);
const SESSIONED_LOCAL_ADAPTERS = new Set([
"claude_local",
@ -211,6 +234,26 @@ export function applyRunScopedMentionedSkillKeys(
]);
}
export function computeBoundedTransientHeartbeatRetrySchedule(
attempt: number,
now = new Date(),
random: () => number = Math.random,
) {
if (!Number.isInteger(attempt) || attempt <= 0) return null;
const baseDelayMs = BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS[attempt - 1];
if (typeof baseDelayMs !== "number") return null;
const sample = Math.min(1, Math.max(0, random()));
const jitterMultiplier = 1 + (((sample * 2) - 1) * BOUNDED_TRANSIENT_HEARTBEAT_RETRY_JITTER_RATIO);
const delayMs = Math.max(1_000, Math.round(baseDelayMs * jitterMultiplier));
return {
attempt,
baseDelayMs,
delayMs,
dueAt: new Date(now.getTime() + delayMs),
maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_MAX_ATTEMPTS,
};
}
async function resolveRunScopedMentionedSkillKeys(input: {
db: Db;
companyId: string;
@ -466,6 +509,9 @@ const heartbeatRunListColumns = {
processStartedAt: heartbeatRuns.processStartedAt,
retryOfRunId: heartbeatRuns.retryOfRunId,
processLossRetryCount: heartbeatRuns.processLossRetryCount,
scheduledRetryAt: heartbeatRuns.scheduledRetryAt,
scheduledRetryAttempt: heartbeatRuns.scheduledRetryAttempt,
scheduledRetryReason: heartbeatRuns.scheduledRetryReason,
livenessState: heartbeatRuns.livenessState,
livenessReason: heartbeatRuns.livenessReason,
continuationAttempt: heartbeatRuns.continuationAttempt,
@ -1192,6 +1238,51 @@ function shouldRequireIssueCommentForWake(
);
}
const BLOCKED_INTERACTION_WAKE_REASONS = new Set([
"issue_commented",
"issue_reopened_via_comment",
"issue_comment_mentioned",
]);
function allowsBlockedIssueInteractionWake(
contextSnapshot: Record<string, unknown> | null | undefined,
) {
const wakeReason = readNonEmptyString(contextSnapshot?.wakeReason);
if (!wakeReason || !BLOCKED_INTERACTION_WAKE_REASONS.has(wakeReason)) return false;
return Boolean(deriveCommentId(contextSnapshot, null));
}
async function listUnresolvedBlockerSummaries(
dbOrTx: Pick<Db, "select">,
companyId: string,
issueId: string,
unresolvedBlockerIssueIds: string[],
) {
const ids = [...new Set(unresolvedBlockerIssueIds.filter(Boolean))];
if (ids.length === 0) return [];
return dbOrTx
.select({
id: issues.id,
identifier: issues.identifier,
title: issues.title,
status: issues.status,
priority: issues.priority,
assigneeAgentId: issues.assigneeAgentId,
assigneeUserId: issues.assigneeUserId,
})
.from(issueRelations)
.innerJoin(issues, eq(issueRelations.issueId, issues.id))
.where(
and(
eq(issueRelations.companyId, companyId),
eq(issueRelations.type, "blocks"),
eq(issueRelations.relatedIssueId, issueId),
inArray(issues.id, ids),
),
)
.orderBy(asc(issues.title));
}
export function formatRuntimeWorkspaceWarningLog(warning: string) {
return {
stream: "stdout" as const,
@ -1525,6 +1616,13 @@ async function buildPaperclipWakePayload(input: {
}
: null,
checkedOutByHarness: input.contextSnapshot[PAPERCLIP_HARNESS_CHECKOUT_KEY] === true,
dependencyBlockedInteraction: input.contextSnapshot.dependencyBlockedInteraction === true,
unresolvedBlockerIssueIds: Array.isArray(input.contextSnapshot.unresolvedBlockerIssueIds)
? input.contextSnapshot.unresolvedBlockerIssueIds.filter((value): value is string => typeof value === "string" && value.length > 0)
: [],
unresolvedBlockerSummaries: Array.isArray(input.contextSnapshot.unresolvedBlockerSummaries)
? input.contextSnapshot.unresolvedBlockerSummaries
: [],
executionStage: Object.keys(executionStage).length > 0 ? executionStage : null,
continuationSummary: continuationSummary
? {
@ -3057,6 +3155,219 @@ export function heartbeatService(db: Db) {
return queued;
}
async function scheduleBoundedRetryForRun(
run: typeof heartbeatRuns.$inferSelect,
agent: typeof agents.$inferSelect,
opts?: {
now?: Date;
random?: () => number;
retryReason?: string;
wakeReason?: string;
},
) {
const now = opts?.now ?? new Date();
const retryReason = opts?.retryReason ?? BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON;
const wakeReason = opts?.wakeReason ?? BOUNDED_TRANSIENT_HEARTBEAT_RETRY_WAKE_REASON;
const nextAttempt = (run.scheduledRetryAttempt ?? 0) + 1;
const schedule = computeBoundedTransientHeartbeatRetrySchedule(nextAttempt, now, opts?.random);
const codexTransientFallbackMode =
agent.adapterType === "codex_local" && retryReason === BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON && run.errorCode === "codex_transient_upstream"
? resolveCodexTransientFallbackMode(nextAttempt)
: null;
if (!schedule) {
await appendRunEvent(run, await nextRunEventSeq(run.id), {
eventType: "lifecycle",
stream: "system",
level: "warn",
message: `Bounded retry exhausted after ${run.scheduledRetryAttempt ?? 0} scheduled attempts; no further automatic retry will be queued`,
payload: {
retryReason,
scheduledRetryAttempt: run.scheduledRetryAttempt ?? 0,
maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_MAX_ATTEMPTS,
},
});
return {
outcome: "retry_exhausted" as const,
attempt: nextAttempt,
maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_MAX_ATTEMPTS,
};
}
const contextSnapshot = parseObject(run.contextSnapshot);
const issueId = readNonEmptyString(contextSnapshot.issueId);
const taskKey = deriveTaskKeyWithHeartbeatFallback(contextSnapshot, null);
const sessionBefore = await resolveSessionBeforeForWakeup(agent, taskKey);
const retryContextSnapshot: Record<string, unknown> = {
...contextSnapshot,
retryOfRunId: run.id,
wakeReason,
retryReason,
scheduledRetryAttempt: schedule.attempt,
scheduledRetryAt: schedule.dueAt.toISOString(),
...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}),
};
const retryRun = await db.transaction(async (tx) => {
const wakeupRequest = await tx
.insert(agentWakeupRequests)
.values({
companyId: run.companyId,
agentId: run.agentId,
source: "automation",
triggerDetail: "system",
reason: wakeReason,
payload: {
...(issueId ? { issueId } : {}),
retryOfRunId: run.id,
retryReason,
scheduledRetryAttempt: schedule.attempt,
scheduledRetryAt: schedule.dueAt.toISOString(),
...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}),
},
status: "queued",
requestedByActorType: "system",
requestedByActorId: null,
updatedAt: now,
})
.returning()
.then((rows) => rows[0]);
const scheduledRun = await tx
.insert(heartbeatRuns)
.values({
companyId: run.companyId,
agentId: run.agentId,
invocationSource: "automation",
triggerDetail: "system",
status: "scheduled_retry",
wakeupRequestId: wakeupRequest.id,
contextSnapshot: retryContextSnapshot,
sessionIdBefore: sessionBefore,
retryOfRunId: run.id,
scheduledRetryAt: schedule.dueAt,
scheduledRetryAttempt: schedule.attempt,
scheduledRetryReason: retryReason,
continuationAttempt: readContinuationAttempt(retryContextSnapshot.livenessContinuationAttempt),
updatedAt: now,
})
.returning()
.then((rows) => rows[0]);
await tx
.update(agentWakeupRequests)
.set({
runId: scheduledRun.id,
updatedAt: now,
})
.where(eq(agentWakeupRequests.id, wakeupRequest.id));
if (issueId) {
await tx
.update(issues)
.set({
executionRunId: scheduledRun.id,
executionAgentNameKey: normalizeAgentNameKey(agent.name),
executionLockedAt: now,
updatedAt: now,
})
.where(and(eq(issues.id, issueId), eq(issues.companyId, run.companyId), eq(issues.executionRunId, run.id)));
}
return scheduledRun;
});
await appendRunEvent(run, await nextRunEventSeq(run.id), {
eventType: "lifecycle",
stream: "system",
level: "warn",
message: `Scheduled bounded retry ${schedule.attempt}/${schedule.maxAttempts} for ${schedule.dueAt.toISOString()}`,
payload: {
retryRunId: retryRun.id,
retryReason,
scheduledRetryAttempt: schedule.attempt,
scheduledRetryAt: schedule.dueAt.toISOString(),
baseDelayMs: schedule.baseDelayMs,
delayMs: schedule.delayMs,
...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}),
},
});
return {
outcome: "scheduled" as const,
run: retryRun,
dueAt: schedule.dueAt,
attempt: schedule.attempt,
maxAttempts: schedule.maxAttempts,
};
}
async function promoteDueScheduledRetries(now = new Date()) {
const dueRuns = await db
.select()
.from(heartbeatRuns)
.where(
and(
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.orderBy(asc(heartbeatRuns.scheduledRetryAt), asc(heartbeatRuns.createdAt), asc(heartbeatRuns.id))
.limit(50);
const promotedRunIds: string[] = [];
for (const dueRun of dueRuns) {
const promoted = await db
.update(heartbeatRuns)
.set({
status: "queued",
updatedAt: now,
})
.where(
and(
eq(heartbeatRuns.id, dueRun.id),
eq(heartbeatRuns.status, "scheduled_retry"),
lte(heartbeatRuns.scheduledRetryAt, now),
),
)
.returning()
.then((rows) => rows[0] ?? null);
if (!promoted) continue;
promotedRunIds.push(promoted.id);
await appendRunEvent(promoted, await nextRunEventSeq(promoted.id), {
eventType: "lifecycle",
stream: "system",
level: "info",
message: "Scheduled retry became due and was promoted to the queued run pool",
payload: {
scheduledRetryAttempt: promoted.scheduledRetryAttempt,
scheduledRetryAt: promoted.scheduledRetryAt ? new Date(promoted.scheduledRetryAt).toISOString() : null,
scheduledRetryReason: promoted.scheduledRetryReason,
},
});
publishLiveEvent({
companyId: promoted.companyId,
type: "heartbeat.run.queued",
payload: {
runId: promoted.id,
agentId: promoted.agentId,
invocationSource: promoted.invocationSource,
triggerDetail: promoted.triggerDetail,
wakeupRequestId: promoted.wakeupRequestId,
},
});
}
return {
promoted: promotedRunIds.length,
runIds: promotedRunIds,
};
}
function parseHeartbeatPolicy(agent: typeof agents.$inferSelect) {
const runtimeConfig = parseObject(agent.runtimeConfig);
const heartbeat = parseObject(runtimeConfig.heartbeat);
@ -3133,7 +3444,7 @@ export function heartbeatService(db: Db) {
if (issueId) {
const dependencyReadiness = await issuesSvc.listDependencyReadiness(run.companyId, [issueId]);
const unresolvedBlockerCount = dependencyReadiness.get(issueId)?.unresolvedBlockerCount ?? 0;
if (unresolvedBlockerCount > 0) {
if (unresolvedBlockerCount > 0 && !allowsBlockedIssueInteractionWake(context)) {
logger.debug({ runId: run.id, issueId, unresolvedBlockerCount }, "claimQueuedRun: skipping blocked run");
return null;
}
@ -3600,7 +3911,7 @@ export function heartbeatService(db: Db) {
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, [...ACTIVE_HEARTBEAT_RUN_STATUSES]),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
),
)
@ -3666,6 +3977,147 @@ export function heartbeatService(db: Db) {
return queued;
}
function formatIssueLinksForComment(relations: Array<{ identifier?: string | null }>) {
const identifiers = [
...new Set(
relations
.map((relation) => relation.identifier)
.filter((identifier): identifier is string => Boolean(identifier)),
),
];
if (identifiers.length === 0) return "another open issue";
return identifiers
.slice(0, 5)
.map((identifier) => {
const prefix = identifier.split("-")[0] || "PAP";
return `[${identifier}](/${prefix}/issues/${identifier})`;
})
.join(", ");
}
async function reconcileUnassignedBlockingIssues() {
const candidates = await db
.select({
id: issues.id,
companyId: issues.companyId,
identifier: issues.identifier,
status: issues.status,
createdByAgentId: issues.createdByAgentId,
})
.from(issueRelations)
.innerJoin(issues, eq(issueRelations.issueId, issues.id))
.where(
and(
eq(issueRelations.type, "blocks"),
inArray(issues.status, ["todo", "blocked"]),
isNull(issues.assigneeAgentId),
isNull(issues.assigneeUserId),
sql`${issues.createdByAgentId} is not null`,
sql`exists (
select 1
from issues blocked_issue
where blocked_issue.id = ${issueRelations.relatedIssueId}
and blocked_issue.company_id = ${issues.companyId}
and blocked_issue.status not in ('done', 'cancelled')
)`,
),
);
let assigned = 0;
let skipped = 0;
const issueIds: string[] = [];
const seen = new Set<string>();
for (const candidate of candidates) {
if (seen.has(candidate.id)) continue;
seen.add(candidate.id);
const creatorAgentId = candidate.createdByAgentId;
if (!creatorAgentId) {
skipped += 1;
continue;
}
const creatorAgent = await getAgent(creatorAgentId);
if (
!creatorAgent ||
creatorAgent.companyId !== candidate.companyId ||
creatorAgent.status === "paused" ||
creatorAgent.status === "terminated" ||
creatorAgent.status === "pending_approval"
) {
skipped += 1;
continue;
}
const relations = await issuesSvc.getRelationSummaries(candidate.id);
const blockingLinks = formatIssueLinksForComment(relations.blocks);
const updated = await issuesSvc.update(candidate.id, {
assigneeAgentId: creatorAgent.id,
assigneeUserId: null,
});
if (!updated) {
skipped += 1;
continue;
}
await issuesSvc.addComment(
candidate.id,
[
"## Assigned Orphan Blocker",
"",
`Paperclip found this issue is blocking ${blockingLinks} but had no assignee, so no heartbeat could pick it up.`,
"",
"- Assigned it back to the agent that created the blocker.",
"- Next action: resolve this blocker or reassign it to the right owner.",
].join("\n"),
{},
);
await logActivity(db, {
companyId: candidate.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: null,
action: "issue.updated",
entityType: "issue",
entityId: candidate.id,
details: {
identifier: candidate.identifier,
assigneeAgentId: creatorAgent.id,
source: "heartbeat.reconcile_unassigned_blocking_issue",
},
});
const queued = await enqueueWakeup(creatorAgent.id, {
source: "automation",
triggerDetail: "system",
reason: "issue_assigned",
payload: {
issueId: candidate.id,
mutation: "unassigned_blocker_recovery",
},
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: {
issueId: candidate.id,
taskId: candidate.id,
wakeReason: "issue_assigned",
source: "issue.unassigned_blocker_recovery",
},
});
if (queued) {
assigned += 1;
issueIds.push(candidate.id);
} else {
skipped += 1;
}
}
return { assigned, skipped, issueIds };
}
async function escalateStrandedAssignedIssue(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
@ -3720,6 +4172,7 @@ export function heartbeatService(db: Db) {
const result = {
dispatchRequeued: 0,
continuationRequeued: 0,
orphanBlockersAssigned: 0,
escalated: 0,
skipped: 0,
issueIds: [] as string[],
@ -3795,7 +4248,6 @@ export function heartbeatService(db: Db) {
result.skipped += 1;
continue;
}
if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
@ -3832,6 +4284,11 @@ export function heartbeatService(db: Db) {
}
}
const orphanBlockerRecovery = await reconcileUnassignedBlockingIssues();
result.orphanBlockersAssigned = orphanBlockerRecovery.assigned;
result.skipped += orphanBlockerRecovery.skipped;
result.issueIds.push(...orphanBlockerRecovery.issueIds);
return result;
}
@ -3895,7 +4352,7 @@ export function heartbeatService(db: Db) {
contextSnapshot: heartbeatRuns.contextSnapshot,
})
.from(heartbeatRuns)
.where(inArray(heartbeatRuns.status, [...ACTIVE_HEARTBEAT_RUN_STATUSES])),
.where(inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES])),
db
.select({
companyId: agentWakeupRequests.companyId,
@ -5209,6 +5666,9 @@ export function heartbeatService(db: Db) {
);
}
}
if (outcome === "failed" && livenessRun.errorCode === "codex_transient_upstream") {
await scheduleBoundedRetryForRun(livenessRun, agent);
}
await finalizeIssueCommentPolicy(livenessRun, agent);
await releaseIssueExecutionAndPromote(livenessRun);
await handleRunLivenessContinuation(livenessRun);
@ -5360,9 +5820,41 @@ export function heartbeatService(db: Db) {
}
}
function buildImmediateExecutionPathRecoveryComment(input: {
status: "todo" | "in_progress";
latestRun: Pick<typeof heartbeatRuns.$inferSelect, "error" | "errorCode"> | null | undefined;
}) {
const failureSummary = summarizeRunFailureForIssueComment(input.latestRun);
if (input.status === "todo") {
return (
"Paperclip automatically retried dispatch for this assigned `todo` issue during terminal run recovery, " +
`but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention."
);
}
return (
"Paperclip automatically retried continuation for this assigned `in_progress` issue during terminal run " +
`recovery, but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention."
);
}
async function releaseIssueExecutionAndPromote(run: typeof heartbeatRuns.$inferSelect) {
const runContext = parseObject(run.contextSnapshot);
const contextIssueId = readNonEmptyString(runContext.issueId);
const taskKey = deriveTaskKeyWithHeartbeatFallback(runContext, null);
const recoveryAgent = await getAgent(run.agentId);
const recoveryAgentInvokable =
recoveryAgent &&
recoveryAgent.status !== "paused" &&
recoveryAgent.status !== "terminated" &&
recoveryAgent.status !== "pending_approval";
const recoverySessionBefore = recoveryAgentInvokable
? await resolveSessionBeforeForWakeup(recoveryAgent, taskKey)
: null;
const recoveryAgentNameKey = normalizeAgentNameKey(recoveryAgent?.name);
const promotionResult = await db.transaction(async (tx) => {
if (contextIssueId) {
await tx.execute(
@ -5380,6 +5872,8 @@ export function heartbeatService(db: Db) {
companyId: issues.companyId,
identifier: issues.identifier,
status: issues.status,
assigneeAgentId: issues.assigneeAgentId,
assigneeUserId: issues.assigneeUserId,
executionRunId: issues.executionRunId,
})
.from(issues)
@ -5421,7 +5915,7 @@ export function heartbeatService(db: Db) {
.limit(1)
.then((rows) => rows[0] ?? null);
if (!deferred) return null;
if (!deferred) break;
const deferredAgent = await tx
.select()
@ -5562,16 +6056,165 @@ export function heartbeatService(db: Db) {
.where(eq(issues.id, issue.id));
return {
kind: "promoted" as const,
run: newRun,
reopenedActivity,
};
}
const issueNeedsImmediateRecovery =
(issue.status === "todo" || issue.status === "in_progress") &&
!issue.assigneeUserId &&
issue.assigneeAgentId === run.agentId &&
(run.status === "failed" || run.status === "timed_out" || run.status === "cancelled");
if (!issueNeedsImmediateRecovery) {
return { kind: "released" as const };
}
const existingExecutionPath = await tx
.select({ id: heartbeatRuns.id })
.from(heartbeatRuns)
.where(
and(
eq(heartbeatRuns.companyId, issue.companyId),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issue.id}`,
sql`${heartbeatRuns.id} <> ${run.id}`,
),
)
.limit(1)
.then((rows) => rows[0] ?? null);
if (existingExecutionPath) {
return { kind: "released" as const };
}
const shouldBlockImmediately =
!recoveryAgentInvokable ||
!recoveryAgent ||
didAutomaticRecoveryFail(run, issue.status === "todo" ? "assignment_recovery" : "issue_continuation_needed");
if (shouldBlockImmediately) {
const comment = buildImmediateExecutionPathRecoveryComment({
status: issue.status as "todo" | "in_progress",
latestRun: run,
});
await tx
.update(issues)
.set({
status: "blocked",
updatedAt: new Date(),
})
.where(eq(issues.id, issue.id));
return {
kind: "blocked" as const,
issueId: issue.id,
issueIdentifier: issue.identifier,
previousStatus: issue.status,
comment,
};
}
const retryReason = issue.status === "todo" ? "assignment_recovery" : "issue_continuation_needed";
const recoveryReason = issue.status === "todo" ? "issue_assignment_recovery" : "issue_continuation_needed";
const recoverySource =
issue.status === "todo" ? "issue.assignment_recovery" : "issue.continuation_recovery";
const now = new Date();
const wakeupRequest = await tx
.insert(agentWakeupRequests)
.values({
companyId: issue.companyId,
agentId: recoveryAgent.id,
source: "automation",
triggerDetail: "system",
reason: recoveryReason,
payload: {
issueId: issue.id,
retryOfRunId: run.id,
},
status: "queued",
requestedByActorType: "system",
requestedByActorId: null,
updatedAt: now,
})
.returning()
.then((rows) => rows[0]);
const queuedRun = await tx
.insert(heartbeatRuns)
.values({
companyId: issue.companyId,
agentId: recoveryAgent.id,
invocationSource: "automation",
triggerDetail: "system",
status: "queued",
wakeupRequestId: wakeupRequest.id,
contextSnapshot: {
issueId: issue.id,
taskId: issue.id,
wakeReason: recoveryReason,
retryReason,
source: recoverySource,
retryOfRunId: run.id,
},
sessionIdBefore: recoverySessionBefore,
retryOfRunId: run.id,
updatedAt: now,
})
.returning()
.then((rows) => rows[0]);
await tx
.update(agentWakeupRequests)
.set({
runId: queuedRun.id,
updatedAt: now,
})
.where(eq(agentWakeupRequests.id, wakeupRequest.id));
await tx
.update(issues)
.set({
executionRunId: queuedRun.id,
executionAgentNameKey: recoveryAgentNameKey,
executionLockedAt: now,
updatedAt: now,
})
.where(eq(issues.id, issue.id));
return {
kind: "queued_recovery" as const,
run: queuedRun,
};
});
if (promotionResult?.kind === "blocked") {
await issuesSvc.addComment(promotionResult.issueId, promotionResult.comment, {});
await logActivity(db, {
companyId: run.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: run.id,
action: "issue.updated",
entityType: "issue",
entityId: promotionResult.issueId,
details: {
identifier: promotionResult.issueIdentifier,
status: "blocked",
previousStatus: promotionResult.previousStatus,
source: "heartbeat.release_issue_execution_and_promote",
latestRunId: run.id,
latestRunStatus: run.status,
latestRunErrorCode: run.errorCode ?? null,
},
});
return;
}
const promotedRun = promotionResult?.run ?? null;
if (!promotedRun) return;
if (promotionResult?.reopenedActivity) {
if (promotionResult?.kind === "promoted" && promotionResult.reopenedActivity) {
await logActivity(db, promotionResult.reopenedActivity);
}
@ -5737,7 +6380,12 @@ export function heartbeatService(db: Db) {
.then((rows) => rows[0] ?? null)
: null;
if (activeExecutionRun && activeExecutionRun.status !== "queued" && activeExecutionRun.status !== "running") {
if (
activeExecutionRun &&
!EXECUTION_PATH_HEARTBEAT_RUN_STATUSES.includes(
activeExecutionRun.status as (typeof EXECUTION_PATH_HEARTBEAT_RUN_STATUSES)[number],
)
) {
activeExecutionRun = null;
}
@ -5760,7 +6408,7 @@ export function heartbeatService(db: Db) {
.where(
and(
eq(heartbeatRuns.companyId, issue.companyId),
inArray(heartbeatRuns.status, ["queued", "running"]),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issue.id}`,
),
)
@ -5790,6 +6438,53 @@ export function heartbeatService(db: Db) {
}
}
const dependencyReadiness = await issuesSvc.listDependencyReadiness(
issue.companyId,
[issue.id],
tx,
).then((rows) => rows.get(issue.id) ?? null);
// Blocked descendants should stay idle until the final blocker resolves.
// Human comment/mention wakes are the exception: they may run in a
// bounded interaction mode so the assignee can answer or triage.
const blockedInteractionWake =
dependencyReadiness &&
!dependencyReadiness.isDependencyReady &&
allowsBlockedIssueInteractionWake(enrichedContextSnapshot);
if (blockedInteractionWake) {
enrichedContextSnapshot.dependencyBlockedInteraction = true;
enrichedContextSnapshot.unresolvedBlockerIssueIds = dependencyReadiness.unresolvedBlockerIssueIds;
enrichedContextSnapshot.unresolvedBlockerCount = dependencyReadiness.unresolvedBlockerCount;
enrichedContextSnapshot.unresolvedBlockerSummaries = await listUnresolvedBlockerSummaries(
tx,
issue.companyId,
issue.id,
dependencyReadiness.unresolvedBlockerIssueIds,
);
}
if (!activeExecutionRun && dependencyReadiness && !dependencyReadiness.isDependencyReady && !blockedInteractionWake) {
await tx.insert(agentWakeupRequests).values({
companyId: agent.companyId,
agentId,
source,
triggerDetail,
reason: "issue_dependencies_blocked",
payload: {
...(payload ?? {}),
issueId,
unresolvedBlockerIssueIds: dependencyReadiness.unresolvedBlockerIssueIds,
},
status: "skipped",
requestedByActorType: opts.requestedByActorType ?? null,
requestedByActorId: opts.requestedByActorId ?? null,
idempotencyKey: opts.idempotencyKey ?? null,
finishedAt: new Date(),
});
return { kind: "skipped" as const };
}
if (activeExecutionRun) {
const executionAgent = await tx
.select({ name: agents.name })
@ -5977,12 +6672,15 @@ export function heartbeatService(db: Db) {
const activeRuns = await db
.select()
.from(heartbeatRuns)
.where(and(eq(heartbeatRuns.agentId, agentId), inArray(heartbeatRuns.status, ["queued", "running"])))
.where(and(eq(heartbeatRuns.agentId, agentId), inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES])))
.orderBy(desc(heartbeatRuns.createdAt));
const sameScopeQueuedRun = activeRuns.find(
(candidate) => candidate.status === "queued" && isSameTaskScope(runTaskKey(candidate), taskKey),
);
const sameScopeScheduledRetryRun = activeRuns.find(
(candidate) => candidate.status === "scheduled_retry" && isSameTaskScope(runTaskKey(candidate), taskKey),
);
const sameScopeRunningRun = activeRuns.find(
(candidate) => candidate.status === "running" && isSameTaskScope(runTaskKey(candidate), taskKey),
);
@ -5993,6 +6691,7 @@ export function heartbeatService(db: Db) {
const coalescedTargetRun =
sameScopeQueuedRun ??
sameScopeScheduledRetryRun ??
(shouldQueueFollowupForRunningWake ? null : sameScopeRunningRun ?? null);
if (coalescedTargetRun) {
@ -6103,7 +6802,7 @@ export function heartbeatService(db: Db) {
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, ["queued", "running"]),
inArray(heartbeatRuns.status, [...CANCELLABLE_HEARTBEAT_RUN_STATUSES]),
sql`${effectiveProjectId} = ${projectId}`,
),
);
@ -6188,7 +6887,7 @@ export function heartbeatService(db: Db) {
async function cancelRunInternal(runId: string, reason = "Cancelled by control plane") {
const run = await getRun(runId);
if (!run) throw notFound("Heartbeat run not found");
if (run.status !== "running" && run.status !== "queued") return run;
if (!CANCELLABLE_HEARTBEAT_RUN_STATUSES.includes(run.status as (typeof CANCELLABLE_HEARTBEAT_RUN_STATUSES)[number])) return run;
const agent = await getAgent(run.agentId);
const running = runningProcesses.get(run.id);
@ -6244,7 +6943,7 @@ export function heartbeatService(db: Db) {
const runs = await db
.select()
.from(heartbeatRuns)
.where(and(eq(heartbeatRuns.agentId, agentId), inArray(heartbeatRuns.status, ["queued", "running"])));
.where(and(eq(heartbeatRuns.agentId, agentId), inArray(heartbeatRuns.status, [...CANCELLABLE_HEARTBEAT_RUN_STATUSES])));
for (const run of runs) {
await setRunStatus(run.id, "cancelled", {
@ -6300,7 +6999,7 @@ export function heartbeatService(db: Db) {
.where(
and(
eq(heartbeatRuns.companyId, scope.companyId),
inArray(heartbeatRuns.status, ["queued", "running"]),
inArray(heartbeatRuns.status, [...CANCELLABLE_HEARTBEAT_RUN_STATUSES]),
),
)
.then((rows) => rows.map((row) => row.id))
@ -6471,6 +7170,25 @@ export function heartbeatService(db: Db) {
.orderBy(asc(heartbeatRunEvents.seq))
.limit(Math.max(1, Math.min(limit, 1000))),
getRetryExhaustedReason: async (runId: string) => {
const row = await db
.select({
message: heartbeatRunEvents.message,
})
.from(heartbeatRunEvents)
.where(
and(
eq(heartbeatRunEvents.runId, runId),
eq(heartbeatRunEvents.eventType, "lifecycle"),
sql`${heartbeatRunEvents.message} like 'Bounded retry exhausted%'`,
),
)
.orderBy(desc(heartbeatRunEvents.id))
.limit(1)
.then((rows) => rows[0] ?? null);
return row?.message ?? null;
},
readLog: async (
runOrLookup: string | {
id: string;
@ -6525,8 +7243,26 @@ export function heartbeatService(db: Db) {
reapOrphanedRuns,
promoteDueScheduledRetries,
resumeQueuedRuns,
scheduleBoundedRetry: async (
runId: string,
opts?: {
now?: Date;
random?: () => number;
retryReason?: string;
wakeReason?: string;
},
) => {
const run = await getRun(runId, { unsafeFullResultJson: true });
if (!run) return { outcome: "missing_run" as const };
const agent = await getAgent(run.agentId);
if (!agent) return { outcome: "missing_agent" as const };
return scheduleBoundedRetryForRun(run, agent, opts);
},
reconcileStrandedAssignedIssues,
reconcileIssueGraphLiveness,

View file

@ -1,3 +1,4 @@
import { Buffer } from "node:buffer";
import { and, asc, desc, eq, inArray, isNull, ne, or, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import {
@ -79,6 +80,7 @@ export interface IssueFilters {
inboxArchivedByUserId?: string;
unreadForUserId?: string;
projectId?: string;
workspaceId?: string;
executionWorkspaceId?: string;
parentId?: string;
labelId?: string;
@ -168,6 +170,7 @@ function sameRunLock(checkoutRunId: string | null, actorRunId: string | null) {
const TERMINAL_HEARTBEAT_RUN_STATUSES = new Set(["succeeded", "failed", "cancelled", "timed_out"]);
const ISSUE_LIST_DESCRIPTION_MAX_CHARS = 1200;
const ISSUE_LIST_DESCRIPTION_MAX_BYTES = ISSUE_LIST_DESCRIPTION_MAX_CHARS * 4;
function escapeLikePattern(value: string): string {
return value.replace(/[\\%_]/g, "\\$&");
@ -191,6 +194,16 @@ function truncateInlineSummary(value: string | null | undefined, maxChars = CHIL
return normalized.length > maxChars ? `${normalized.slice(0, Math.max(0, maxChars - 15)).trimEnd()} [truncated]` : normalized;
}
function truncateByCodePoint(value: string, maxChars: number): string {
if (value.length <= maxChars) return value;
return Array.from(value).slice(0, maxChars).join("");
}
function decodeDatabaseTextPreview(value: string | null | undefined, maxChars: number): string | null {
if (value == null) return null;
return truncateByCodePoint(Buffer.from(value, "base64").toString("utf8"), maxChars);
}
function appendAcceptanceCriteriaToDescription(description: string | null | undefined, acceptanceCriteria: string[] | undefined) {
const criteria = (acceptanceCriteria ?? []).map((item) => item.trim()).filter(Boolean);
if (criteria.length === 0) return description ?? null;
@ -275,7 +288,6 @@ async function listUnresolvedBlockerIssueIds(
)
.then((rows) => rows.map((row) => row.id));
}
async function getProjectDefaultGoalId(
db: ProjectGoalReader,
companyId: string,
@ -681,7 +693,13 @@ const issueListSelect = {
description: sql<string | null>`
CASE
WHEN ${issues.description} IS NULL THEN NULL
ELSE substring(${issues.description} FROM 1 FOR ${ISSUE_LIST_DESCRIPTION_MAX_CHARS})
ELSE encode(
substring(
convert_to(${issues.description}, current_setting('server_encoding'))
FROM 1 FOR ${ISSUE_LIST_DESCRIPTION_MAX_BYTES}
),
'base64'
)
END
`,
status: issues.status,
@ -699,6 +717,7 @@ const issueListSelect = {
originKind: issues.originKind,
originId: issues.originId,
originRunId: issues.originRunId,
originFingerprint: issues.originFingerprint,
requestDepth: issues.requestDepth,
billingCode: issues.billingCode,
assigneeAdapterOverrides: issues.assigneeAdapterOverrides,
@ -1275,6 +1294,12 @@ export function issueService(db: Db) {
conditions.push(unreadForUserCondition(companyId, unreadForUserId));
}
if (filters?.projectId) conditions.push(eq(issues.projectId, filters.projectId));
if (filters?.workspaceId) {
conditions.push(or(
eq(issues.executionWorkspaceId, filters.workspaceId),
eq(issues.projectWorkspaceId, filters.workspaceId),
)!);
}
if (filters?.executionWorkspaceId) {
conditions.push(eq(issues.executionWorkspaceId, filters.executionWorkspaceId));
}
@ -1327,7 +1352,10 @@ export function issueService(db: Db) {
desc(canonicalLastActivityAt),
desc(issues.updatedAt),
);
const rows = limit === undefined ? await baseQuery : await baseQuery.limit(limit);
const rows = (limit === undefined ? await baseQuery : await baseQuery.limit(limit)).map((row) => ({
...row,
description: decodeDatabaseTextPreview(row.description, ISSUE_LIST_DESCRIPTION_MAX_CHARS),
}));
const withLabels = await withIssueLabels(db, rows);
const runMap = await activeRunMapForIssues(db, withLabels);
const withRuns = withActiveRuns(withLabels, runMap);

View file

@ -47,7 +47,7 @@ import { queueIssueAssignmentWakeup, type IssueAssignmentWakeupDeps } from "./is
import { logActivity } from "./activity-log.js";
const OPEN_ISSUE_STATUSES = ["backlog", "todo", "in_progress", "in_review", "blocked"];
const LIVE_HEARTBEAT_RUN_STATUSES = ["queued", "running"];
const LIVE_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"];
const TERMINAL_ISSUE_STATUSES = new Set(["done", "cancelled"]);
const MAX_CATCH_UP_RUNS = 25;
const WEEKDAY_INDEX: Record<string, number> = {
@ -320,6 +320,37 @@ function mergeRoutineRunPayload(
};
}
function normalizeRoutineDispatchFingerprintValue(value: unknown): unknown {
if (value === undefined) return null;
if (value == null || typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
return value;
}
if (value instanceof Date) return value.toISOString();
if (Array.isArray(value)) return value.map((item) => normalizeRoutineDispatchFingerprintValue(item));
if (isPlainRecord(value)) {
return Object.fromEntries(
Object.keys(value)
.sort()
.map((key) => [key, normalizeRoutineDispatchFingerprintValue(value[key])]),
);
}
return String(value);
}
function createRoutineDispatchFingerprint(input: {
payload: Record<string, unknown> | null;
projectId: string | null;
assigneeAgentId: string | null;
executionWorkspaceId?: string | null;
executionWorkspacePreference?: string | null;
executionWorkspaceSettings?: Record<string, unknown> | null;
title: string;
description: string | null;
}) {
const canonical = JSON.stringify(normalizeRoutineDispatchFingerprintValue(input));
return crypto.createHash("sha256").update(canonical).digest("hex");
}
function routineUsesWorkspaceBranch(routine: typeof routines.$inferSelect) {
return (routine.variables ?? []).some((variable) => variable.name === WORKSPACE_BRANCH_ROUTINE_VARIABLE)
|| extractRoutineVariableNames([routine.title, routine.description]).includes(WORKSPACE_BRANCH_ROUTINE_VARIABLE);
@ -426,6 +457,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: routineRuns.triggeredAt,
idempotencyKey: routineRuns.idempotencyKey,
triggerPayload: routineRuns.triggerPayload,
dispatchFingerprint: routineRuns.dispatchFingerprint,
linkedIssueId: routineRuns.linkedIssueId,
coalescedIntoRunId: routineRuns.coalescedIntoRunId,
failureReason: routineRuns.failureReason,
@ -458,6 +490,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: row.triggeredAt,
idempotencyKey: row.idempotencyKey,
triggerPayload: row.triggerPayload as Record<string, unknown> | null,
dispatchFingerprint: row.dispatchFingerprint,
linkedIssueId: row.linkedIssueId,
coalescedIntoRunId: row.coalescedIntoRunId,
failureReason: row.failureReason,
@ -606,7 +639,22 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
}
}
async function findLiveExecutionIssue(routine: typeof routines.$inferSelect, executor: Db = db) {
function routineExecutionFingerprintCondition(dispatchFingerprint?: string | null) {
if (!dispatchFingerprint) return null;
// The "default" arm preserves coalescing against pre-migration open issues.
// It becomes inert once those legacy routine execution issues drain out.
return or(
eq(issues.originFingerprint, dispatchFingerprint),
eq(issues.originFingerprint, "default"),
);
}
async function findLiveExecutionIssue(
routine: typeof routines.$inferSelect,
executor: Db = db,
dispatchFingerprint?: string | null,
) {
const fingerprintCondition = routineExecutionFingerprintCondition(dispatchFingerprint);
const executionBoundIssue = await executor
.select()
.from(issues)
@ -624,6 +672,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
eq(issues.originId, routine.id),
inArray(issues.status, OPEN_ISSUE_STATUSES),
isNull(issues.hiddenAt),
...(fingerprintCondition ? [fingerprintCondition] : []),
),
)
.orderBy(desc(issues.updatedAt), desc(issues.createdAt))
@ -649,6 +698,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
eq(issues.originId, routine.id),
inArray(issues.status, OPEN_ISSUE_STATUSES),
isNull(issues.hiddenAt),
...(fingerprintCondition ? [fingerprintCondition] : []),
),
)
.orderBy(desc(issues.updatedAt), desc(issues.createdAt))
@ -745,6 +795,16 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
const title = interpolateRoutineTemplate(input.routine.title, allVariables) ?? input.routine.title;
const description = interpolateRoutineTemplate(input.routine.description, allVariables);
const triggerPayload = mergeRoutineRunPayload(input.payload, { ...automaticVariables, ...resolvedVariables });
const dispatchFingerprint = createRoutineDispatchFingerprint({
payload: triggerPayload,
projectId,
assigneeAgentId,
executionWorkspaceId: input.executionWorkspaceId ?? null,
executionWorkspacePreference: input.executionWorkspacePreference ?? null,
executionWorkspaceSettings: input.executionWorkspaceSettings ?? null,
title,
description,
});
const run = await db.transaction(async (tx) => {
const txDb = tx as unknown as Db;
await tx.execute(
@ -782,6 +842,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt,
idempotencyKey: input.idempotencyKey ?? null,
triggerPayload,
dispatchFingerprint,
})
.returning();
@ -791,7 +852,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
let createdIssue: Awaited<ReturnType<typeof issueSvc.create>> | null = null;
try {
const activeIssue = await findLiveExecutionIssue(input.routine, txDb);
const activeIssue = await findLiveExecutionIssue(input.routine, txDb, dispatchFingerprint);
if (activeIssue && input.routine.concurrencyPolicy !== "always_enqueue") {
const status = input.routine.concurrencyPolicy === "skip_if_active" ? "skipped" : "coalesced";
const updated = await finalizeRun(createdRun.id, {
@ -824,6 +885,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
originKind: "routine_execution",
originId: input.routine.id,
originRunId: createdRun.id,
originFingerprint: dispatchFingerprint,
executionWorkspaceId: input.executionWorkspaceId ?? null,
executionWorkspacePreference: input.executionWorkspacePreference ?? null,
executionWorkspaceSettings: input.executionWorkspaceSettings ?? null,
@ -840,7 +902,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
throw error;
}
const existingIssue = await findLiveExecutionIssue(input.routine, txDb);
const existingIssue = await findLiveExecutionIssue(input.routine, txDb, dispatchFingerprint);
if (!existingIssue) throw error;
const status = input.routine.concurrencyPolicy === "skip_if_active" ? "skipped" : "coalesced";
const updated = await finalizeRun(createdRun.id, {
@ -994,6 +1056,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: routineRuns.triggeredAt,
idempotencyKey: routineRuns.idempotencyKey,
triggerPayload: routineRuns.triggerPayload,
dispatchFingerprint: routineRuns.dispatchFingerprint,
linkedIssueId: routineRuns.linkedIssueId,
coalescedIntoRunId: routineRuns.coalescedIntoRunId,
failureReason: routineRuns.failureReason,
@ -1025,6 +1088,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: run.triggeredAt,
idempotencyKey: run.idempotencyKey,
triggerPayload: run.triggerPayload as Record<string, unknown> | null,
dispatchFingerprint: run.dispatchFingerprint,
linkedIssueId: run.linkedIssueId,
coalescedIntoRunId: run.coalescedIntoRunId,
failureReason: run.failureReason,
@ -1437,6 +1501,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: routineRuns.triggeredAt,
idempotencyKey: routineRuns.idempotencyKey,
triggerPayload: routineRuns.triggerPayload,
dispatchFingerprint: routineRuns.dispatchFingerprint,
linkedIssueId: routineRuns.linkedIssueId,
coalescedIntoRunId: routineRuns.coalescedIntoRunId,
failureReason: routineRuns.failureReason,
@ -1468,6 +1533,7 @@ export function routineService(db: Db, deps: { heartbeat?: IssueAssignmentWakeup
triggeredAt: row.triggeredAt,
idempotencyKey: row.idempotencyKey,
triggerPayload: row.triggerPayload as Record<string, unknown> | null,
dispatchFingerprint: row.dispatchFingerprint,
linkedIssueId: row.linkedIssueId,
coalescedIntoRunId: row.coalescedIntoRunId,
failureReason: row.failureReason,