[codex] Add issue monitor liveness controls (#4988)

## Thinking Path

> - Paperclip is a control plane for autonomous AI companies where work
must stay observable, governable, and recoverable.
> - The task/heartbeat subsystem owns agent execution continuity, issue
state transitions, and visible recovery behavior.
> - Waiting on an external service is not the same as being blocked when
the assignee still owns a future check.
> - The gap was that agents had no first-class one-shot monitor state
for external-service waits, so recovery could look stalled or require ad
hoc comments.
> - This pull request adds bounded issue monitors that can wake the
owner, clear exhausted waits, and produce explicit recovery behavior.
> - It also surfaces monitor status in the board UI and documents when
to use monitors versus `blocked`.
> - The benefit is clearer liveness semantics for asynchronous waits
without weakening single-assignee task ownership.

## What Changed

- Added issue monitor fields, shared types, validators, constants, and
an idempotent `0075` migration for scheduled monitor state.
- Added server-side monitor scheduling, dispatch, recovery bounds,
activity logging, and external-ref redaction.
- Added board/agent route coverage for monitor permissions and child
monitor scheduling.
- Added issue detail/property UI for monitor state, a monitor activity
card, and Storybook stories for review surfaces.
- Documented monitor semantics and recovery policy behavior in
`doc/execution-semantics.md`.
- Addressed Greptile review feedback by preserving monitor state in
skipped-stage builders and making board monitor saves send `scheduledBy:
"board"`.

## Verification

- `pnpm install --frozen-lockfile`
- `pnpm run preflight:workspace-links && pnpm exec vitest run
server/src/__tests__/issue-execution-policy-routes.test.ts
server/src/__tests__/issue-execution-policy.test.ts
server/src/__tests__/issue-monitor-scheduler.test.ts
server/src/__tests__/recovery-classifiers.test.ts
ui/src/components/IssueMonitorActivityCard.test.tsx
ui/src/components/IssueProperties.test.tsx
ui/src/lib/activity-format.test.ts`
- First run passed 5 files and failed to collect 2 server suites because
the worktree was missing the optional `acpx/runtime` dependency.
- After `pnpm install --frozen-lockfile`, reran the 2 failed suites
successfully.
- `pnpm exec vitest run
server/src/__tests__/issue-monitor-scheduler.test.ts
server/src/__tests__/recovery-classifiers.test.ts`
- `pnpm --filter @paperclipai/shared typecheck && pnpm --filter
@paperclipai/db typecheck && pnpm --filter @paperclipai/server typecheck
&& pnpm --filter @paperclipai/ui typecheck`
- `pnpm exec vitest run
server/src/__tests__/issue-execution-policy.test.ts
ui/src/components/IssueProperties.test.tsx`
- `pnpm --filter @paperclipai/server typecheck && pnpm --filter
@paperclipai/ui typecheck`
- `pnpm exec vitest run
ui/src/components/IssueMonitorActivityCard.test.tsx
ui/src/components/IssueProperties.test.tsx`
- `pnpm --filter @paperclipai/ui typecheck`
- Storybook screenshot captured from
`http://127.0.0.1:6006/iframe.html?viewMode=story&id=product-issue-monitor-surfaces--monitor-surfaces`
with Playwright.

## Screenshots

![Issue monitor Storybook
surfaces](https://raw.githubusercontent.com/paperclipai/paperclip/PAP-2945-when-a-task-is-waiting-for-an-_external-service_-what-state-should-it-be-in-and-what-recovery-method-could-it-h/docs/pr-screenshots/pap-2945/monitor-surfaces.png)

## Risks

- Medium: this changes heartbeat recovery behavior for scheduled
external-service waits, so regressions could affect wake timing or
recovery issue creation.
- Migration risk is reduced by using `IF NOT EXISTS` for the new issue
monitor columns and index.
- External monitor references are treated as secret-adjacent and are
intentionally omitted from visible activity/wake payloads.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5 coding agent with repository tool use and terminal
execution.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots or Storybook review surfaces
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-05-03 08:58:53 -05:00 committed by GitHub
parent 76f09c8eb6
commit 57229d0f24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 19324 additions and 20 deletions

View file

@ -3,7 +3,7 @@ import path from "node:path";
import { execFile as execFileCallback } from "node:child_process";
import { promisify } from "node:util";
import { randomUUID } from "node:crypto";
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, lte, notInArray, or, sql } from "drizzle-orm";
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, lt, lte, notInArray, or, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import {
AGENT_DEFAULT_MAX_CONCURRENT_RUNS,
@ -14,6 +14,9 @@ import {
type EnvironmentLeaseStatus,
type ExecutionWorkspace,
type ExecutionWorkspaceConfig,
type IssueExecutionMonitorClearReason,
type IssueExecutionMonitorPolicy,
type IssueExecutionMonitorRecoveryPolicy,
type ModelProfileKey,
type RunLivenessState,
} from "@paperclipai/shared";
@ -85,7 +88,12 @@ import {
sanitizeRuntimeServiceBaseEnv,
} from "./workspace-runtime.js";
import { issueService } from "./issues.js";
import { parseIssueExecutionState } from "./issue-execution-policy.js";
import {
buildIssueMonitorClearedPatch,
buildIssueMonitorTriggeredPatch,
normalizeIssueExecutionPolicy,
parseIssueExecutionState,
} from "./issue-execution-policy.js";
import {
ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS,
isVerifiedIssueTreeControlInteractionWake,
@ -2328,6 +2336,689 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
.then((rows) => rows[0] ?? null);
}
const issueMonitorDispatchColumns = {
id: issues.id,
companyId: issues.companyId,
projectId: issues.projectId,
goalId: issues.goalId,
identifier: issues.identifier,
title: issues.title,
status: issues.status,
priority: issues.priority,
assigneeAgentId: issues.assigneeAgentId,
assigneeUserId: issues.assigneeUserId,
billingCode: issues.billingCode,
executionPolicy: issues.executionPolicy,
executionState: issues.executionState,
monitorNextCheckAt: issues.monitorNextCheckAt,
monitorWakeRequestedAt: issues.monitorWakeRequestedAt,
monitorLastTriggeredAt: issues.monitorLastTriggeredAt,
monitorAttemptCount: issues.monitorAttemptCount,
monitorNotes: issues.monitorNotes,
monitorScheduledBy: issues.monitorScheduledBy,
};
interface IssueMonitorDispatchRow {
id: string;
companyId: string;
projectId: string | null;
goalId: string | null;
identifier: string | null;
title: string;
status: string;
priority: string;
assigneeAgentId: string | null;
assigneeUserId: string | null;
billingCode: string | null;
executionPolicy: Record<string, unknown> | null;
executionState: Record<string, unknown> | null;
monitorNextCheckAt: Date | null;
monitorWakeRequestedAt: Date | null;
monitorLastTriggeredAt: Date | null;
monitorAttemptCount: number | null;
monitorNotes: string | null;
monitorScheduledBy: string | null;
}
function parseMonitorDate(value: string | null | undefined) {
if (!value) return null;
const date = new Date(value);
return Number.isNaN(date.getTime()) ? null : date;
}
function issueMonitorLimitClearReason(input: {
monitor: IssueExecutionMonitorPolicy | null;
nextAttemptCount: number;
now: Date;
}): IssueExecutionMonitorClearReason | null {
const timeoutAt = parseMonitorDate(input.monitor?.timeoutAt ?? null);
if (timeoutAt && input.now.getTime() >= timeoutAt.getTime()) {
return "timeout_exceeded";
}
const maxAttempts = input.monitor?.maxAttempts ?? null;
if (maxAttempts !== null && input.nextAttemptCount > maxAttempts) {
return "max_attempts_exhausted";
}
return null;
}
function monitorRecoveryPolicy(
monitor: IssueExecutionMonitorPolicy | null,
): IssueExecutionMonitorRecoveryPolicy {
return monitor?.recoveryPolicy ?? "wake_owner";
}
function monitorRecoveryDetails(input: {
claimed: IssueMonitorDispatchRow;
scheduledAtIso: string;
nextAttemptCount: number;
clearReason: IssueExecutionMonitorClearReason;
recoveryPolicy: IssueExecutionMonitorRecoveryPolicy;
monitor: IssueExecutionMonitorPolicy | null;
source: "manual" | "scheduled";
}) {
return {
identifier: input.claimed.identifier,
nextCheckAt: input.scheduledAtIso,
attemptedAttemptCount: input.nextAttemptCount,
notes: input.claimed.monitorNotes ?? null,
serviceName: input.monitor?.serviceName ?? null,
timeoutAt: input.monitor?.timeoutAt ?? null,
maxAttempts: input.monitor?.maxAttempts ?? null,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
source: input.source,
};
}
function formatIssueIdentifierLink(identifier: string | null, fallback: string) {
if (!identifier) return fallback;
const prefix = identifier.split("-")[0];
if (!prefix || !/^[A-Z][A-Z0-9]*-\d+$/.test(identifier)) return identifier;
return `[${identifier}](/${prefix}/issues/${identifier})`;
}
function monitorRecoveryComment(input: {
issue: IssueMonitorDispatchRow;
clearReason: IssueExecutionMonitorClearReason;
recoveryPolicy: IssueExecutionMonitorRecoveryPolicy;
nextAttemptCount: number;
}) {
const label = formatIssueIdentifierLink(input.issue.identifier, input.issue.id);
const reason =
input.clearReason === "timeout_exceeded"
? "its timeout was reached"
: "its maximum attempt count was reached";
return [
`Paperclip cleared the scheduled external-service monitor for ${label} because ${reason}.`,
"",
`- Attempt count: ${input.nextAttemptCount}`,
`- Recovery policy: ${input.recoveryPolicy}`,
"",
"Next action: inspect the external service state, record the result on this issue, and restore an explicit execution or waiting path if more work remains.",
].join("\n");
}
async function findOpenIssueMonitorRecoveryIssue(claimed: IssueMonitorDispatchRow) {
return db
.select()
.from(issues)
.where(
and(
eq(issues.companyId, claimed.companyId),
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.strandedIssueRecovery),
eq(issues.originId, claimed.id),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.orderBy(desc(issues.createdAt))
.limit(1)
.then((rows) => rows[0] ?? null);
}
async function performIssueMonitorRecovery(input: {
claimed: IssueMonitorDispatchRow;
scheduledAtIso: string;
nextAttemptCount: number;
clearReason: IssueExecutionMonitorClearReason;
recoveryPolicy: IssueExecutionMonitorRecoveryPolicy;
monitor: IssueExecutionMonitorPolicy | null;
actorType: "user" | "agent" | "system";
actorId: string;
agentId: string | null;
runId: string | null;
activitySource: "manual" | "scheduled";
}) {
const details = monitorRecoveryDetails({
claimed: input.claimed,
scheduledAtIso: input.scheduledAtIso,
nextAttemptCount: input.nextAttemptCount,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
monitor: input.monitor,
source: input.activitySource,
});
if (input.recoveryPolicy === "create_recovery_issue") {
let recoveryIssue = await findOpenIssueMonitorRecoveryIssue(input.claimed);
if (!recoveryIssue) {
recoveryIssue = await issuesSvc.create(input.claimed.companyId, {
title: `Recover external-service monitor for ${input.claimed.identifier ?? input.claimed.title}`,
description: monitorRecoveryComment({
issue: input.claimed,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
nextAttemptCount: input.nextAttemptCount,
}),
status: "todo",
priority: "high",
parentId: input.claimed.id,
projectId: input.claimed.projectId,
goalId: input.claimed.goalId,
assigneeAgentId: input.claimed.assigneeAgentId,
originKind: RECOVERY_ORIGIN_KINDS.strandedIssueRecovery,
originId: input.claimed.id,
originFingerprint: `issue_monitor:${input.clearReason}`,
billingCode: input.claimed.billingCode,
});
}
if (recoveryIssue.assigneeAgentId) {
await enqueueWakeup(recoveryIssue.assigneeAgentId, {
source: "automation",
triggerDetail: "system",
reason: "issue_monitor_recovery_issue",
idempotencyKey: `issue-monitor-recovery-issue:${input.claimed.id}:${input.clearReason}:${input.scheduledAtIso}`,
payload: { issueId: recoveryIssue.id, sourceIssueId: input.claimed.id },
requestedByActorType: input.actorType,
requestedByActorId: input.actorId,
contextSnapshot: {
issueId: recoveryIssue.id,
sourceIssueId: input.claimed.id,
source: "issue.monitor.recovery_issue",
wakeReason: "issue_monitor_recovery_issue",
},
});
}
await logActivity(db, {
companyId: input.claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_recovery_issue_created",
entityType: "issue",
entityId: input.claimed.id,
details: {
...details,
recoveryIssueId: recoveryIssue.id,
recoveryIdentifier: recoveryIssue.identifier,
},
});
return;
}
if (input.recoveryPolicy === "escalate_to_board") {
await db.insert(issueComments).values({
companyId: input.claimed.companyId,
issueId: input.claimed.id,
body: monitorRecoveryComment({
issue: input.claimed,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
nextAttemptCount: input.nextAttemptCount,
}),
});
await logActivity(db, {
companyId: input.claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_escalated_to_board",
entityType: "issue",
entityId: input.claimed.id,
details,
});
return;
}
await enqueueWakeup(input.claimed.assigneeAgentId!, {
source: "automation",
triggerDetail: "system",
reason: "issue_monitor_recovery",
idempotencyKey: `issue-monitor-recovery:${input.claimed.id}:${input.clearReason}:${input.scheduledAtIso}`,
payload: {
issueId: input.claimed.id,
monitorAttemptCount: input.nextAttemptCount,
monitorNotes: input.claimed.monitorNotes ?? null,
clearReason: input.clearReason,
serviceName: input.monitor?.serviceName ?? null,
timeoutAt: input.monitor?.timeoutAt ?? null,
maxAttempts: input.monitor?.maxAttempts ?? null,
},
requestedByActorType: input.actorType,
requestedByActorId: input.actorId,
contextSnapshot: {
issueId: input.claimed.id,
source: "issue.monitor.recovery",
wakeReason: "issue_monitor_recovery",
monitorAttemptCount: input.nextAttemptCount,
monitorNotes: input.claimed.monitorNotes ?? null,
clearReason: input.clearReason,
serviceName: input.monitor?.serviceName ?? null,
timeoutAt: input.monitor?.timeoutAt ?? null,
maxAttempts: input.monitor?.maxAttempts ?? null,
},
});
await logActivity(db, {
companyId: input.claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_recovery_wake_queued",
entityType: "issue",
entityId: input.claimed.id,
details,
});
}
async function clearIssueMonitorAndRecover(input: {
claimed: IssueMonitorDispatchRow;
policy: ReturnType<typeof normalizeIssueExecutionPolicy>;
scheduledAtIso: string;
nextAttemptCount: number;
clearReason: IssueExecutionMonitorClearReason;
recoveryPolicy: IssueExecutionMonitorRecoveryPolicy;
monitor: IssueExecutionMonitorPolicy | null;
now: Date;
actorType: "user" | "agent" | "system";
actorId: string;
agentId: string | null;
runId: string | null;
activitySource: "manual" | "scheduled";
}) {
await db
.update(issues)
.set({
...buildIssueMonitorClearedPatch({
issue: input.claimed,
policy: input.policy,
clearReason: input.clearReason,
clearedAt: input.now,
}),
updatedAt: input.now,
})
.where(eq(issues.id, input.claimed.id));
await logActivity(db, {
companyId: input.claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_exhausted",
entityType: "issue",
entityId: input.claimed.id,
details: monitorRecoveryDetails({
claimed: input.claimed,
scheduledAtIso: input.scheduledAtIso,
nextAttemptCount: input.nextAttemptCount,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
monitor: input.monitor,
source: input.activitySource,
}),
});
await performIssueMonitorRecovery({
claimed: input.claimed,
scheduledAtIso: input.scheduledAtIso,
nextAttemptCount: input.nextAttemptCount,
clearReason: input.clearReason,
recoveryPolicy: input.recoveryPolicy,
monitor: input.monitor,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
activitySource: input.activitySource,
});
return { outcome: "skipped" as const, reason: input.clearReason };
}
async function dispatchClaimedIssueMonitor(
claimed: IssueMonitorDispatchRow,
input: {
now: Date;
source: "automation" | "on_demand";
triggerDetail: "manual" | "system";
wakeReason: string;
actorType: "user" | "agent" | "system";
actorId: string;
agentId: string | null;
runId: string | null;
clearOnClientError: boolean;
activitySource: "manual" | "scheduled";
},
) {
if (!claimed.assigneeAgentId || !claimed.monitorNextCheckAt) {
throw conflict("Issue monitor is not ready to dispatch");
}
const scheduledAtIso = claimed.monitorNextCheckAt.toISOString();
const nextAttemptCount = (claimed.monitorAttemptCount ?? 0) + 1;
const policy = normalizeIssueExecutionPolicy(claimed.executionPolicy ?? null);
const monitor = policy?.monitor ?? null;
const clearReason = issueMonitorLimitClearReason({ monitor, nextAttemptCount, now: input.now });
const recoveryPolicy = monitorRecoveryPolicy(monitor);
const monitorMetadata = {
serviceName: monitor?.serviceName ?? null,
timeoutAt: monitor?.timeoutAt ?? null,
maxAttempts: monitor?.maxAttempts ?? null,
recoveryPolicy: monitor?.recoveryPolicy ?? null,
};
if (clearReason) {
return clearIssueMonitorAndRecover({
claimed,
policy,
scheduledAtIso,
nextAttemptCount,
clearReason,
recoveryPolicy,
monitor,
now: input.now,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
activitySource: input.activitySource,
});
}
try {
await enqueueWakeup(claimed.assigneeAgentId, {
source: input.source,
triggerDetail: input.triggerDetail,
reason: input.wakeReason,
idempotencyKey: `issue-monitor:${claimed.id}:${scheduledAtIso}`,
payload: {
issueId: claimed.id,
nextCheckAt: scheduledAtIso,
monitorAttemptCount: nextAttemptCount,
monitorNotes: claimed.monitorNotes ?? null,
...monitorMetadata,
source: input.activitySource,
},
requestedByActorType: input.actorType,
requestedByActorId: input.actorId,
contextSnapshot: {
issueId: claimed.id,
source: "issue.monitor",
wakeReason: input.wakeReason,
nextCheckAt: scheduledAtIso,
monitorAttemptCount: nextAttemptCount,
monitorNotes: claimed.monitorNotes ?? null,
...monitorMetadata,
manualTrigger: input.activitySource === "manual",
},
});
await db
.update(issues)
.set({
...buildIssueMonitorTriggeredPatch({
issue: claimed,
policy,
triggeredAt: input.now,
}),
updatedAt: new Date(),
})
.where(eq(issues.id, claimed.id));
await logActivity(db, {
companyId: claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_triggered",
entityType: "issue",
entityId: claimed.id,
details: {
identifier: claimed.identifier,
nextCheckAt: scheduledAtIso,
lastTriggeredAt: input.now.toISOString(),
attemptCount: nextAttemptCount,
notes: claimed.monitorNotes ?? null,
...monitorMetadata,
source: input.activitySource,
},
});
return { outcome: "triggered" as const };
} catch (err) {
if (err instanceof HttpError && err.status >= 400 && err.status < 500) {
if (input.clearOnClientError) {
await db
.update(issues)
.set({
...buildIssueMonitorClearedPatch({
issue: claimed,
policy,
clearReason: "dispatch_skipped",
clearedAt: input.now,
}),
updatedAt: new Date(),
})
.where(eq(issues.id, claimed.id));
await logActivity(db, {
companyId: claimed.companyId,
actorType: input.actorType,
actorId: input.actorId,
agentId: input.agentId,
runId: input.runId,
action: "issue.monitor_skipped",
entityType: "issue",
entityId: claimed.id,
details: {
identifier: claimed.identifier,
nextCheckAt: scheduledAtIso,
attemptCount: nextAttemptCount,
notes: claimed.monitorNotes ?? null,
reason: err.message,
source: input.activitySource,
},
});
return { outcome: "skipped" as const, reason: err.message };
}
await db
.update(issues)
.set({
monitorWakeRequestedAt: null,
updatedAt: new Date(),
})
.where(eq(issues.id, claimed.id));
} else {
await db
.update(issues)
.set({
monitorWakeRequestedAt: null,
updatedAt: new Date(),
})
.where(eq(issues.id, claimed.id));
}
throw err;
}
}
async function triggerIssueMonitor(issueId: string, input?: {
now?: Date;
actorType?: "user" | "agent" | "system";
actorId?: string | null;
agentId?: string | null;
runId?: string | null;
wakeReason?: string;
}) {
const now = input?.now ?? new Date();
const actorType = input?.actorType ?? "system";
const actorId = input?.actorId ?? (actorType === "system" ? "heartbeat_scheduler" : null);
if (!actorId) {
throw conflict("Issue monitor trigger requires an actor");
}
const issue = await db
.select(issueMonitorDispatchColumns)
.from(issues)
.where(eq(issues.id, issueId))
.limit(1)
.then((rows) => rows[0] ?? null);
if (!issue) {
throw notFound("Issue not found");
}
if (!issue.monitorNextCheckAt) {
throw conflict("Issue has no scheduled monitor");
}
if (!issue.assigneeAgentId || issue.assigneeUserId) {
throw conflict("Issue monitor requires an agent assignee");
}
if (!["in_progress", "in_review"].includes(issue.status)) {
throw conflict("Issue monitor can only run while the issue is in progress or in review");
}
const staleClaimThreshold = new Date(now.getTime() - 5 * 60 * 1000);
const claimed = await db.transaction(async (tx) => {
const [updated] = await tx
.update(issues)
.set({
monitorWakeRequestedAt: now,
updatedAt: now,
})
.where(
and(
eq(issues.id, issueId),
sql`${issues.monitorNextCheckAt} is not null`,
isNull(issues.assigneeUserId),
sql`${issues.assigneeAgentId} is not null`,
inArray(issues.status, ["in_progress", "in_review"]),
or(
isNull(issues.monitorWakeRequestedAt),
lt(issues.monitorWakeRequestedAt, staleClaimThreshold),
),
),
)
.returning();
return (updated ?? null) as IssueMonitorDispatchRow | null;
});
if (!claimed) {
throw conflict("Issue monitor check is already in progress");
}
return dispatchClaimedIssueMonitor(claimed, {
now,
source: "on_demand",
triggerDetail: "manual",
wakeReason: input?.wakeReason ?? "issue_monitor_due",
actorType,
actorId,
agentId: input?.agentId ?? null,
runId: input?.runId ?? null,
clearOnClientError: false,
activitySource: "manual",
});
}
async function tickDueIssueMonitors(now = new Date()) {
const staleClaimThreshold = new Date(now.getTime() - 5 * 60 * 1000);
const dueMonitors = await db
.select(issueMonitorDispatchColumns)
.from(issues)
.where(
and(
sql`${issues.monitorNextCheckAt} is not null`,
lte(issues.monitorNextCheckAt, now),
isNull(issues.assigneeUserId),
sql`${issues.assigneeAgentId} is not null`,
inArray(issues.status, ["in_progress", "in_review"]),
or(
isNull(issues.monitorWakeRequestedAt),
lt(issues.monitorWakeRequestedAt, staleClaimThreshold),
),
),
)
.orderBy(asc(issues.monitorNextCheckAt), asc(issues.updatedAt))
.limit(50);
let triggered = 0;
let skipped = 0;
for (const due of dueMonitors) {
const claimed = await db.transaction(async (tx) => {
const [updated] = await tx
.update(issues)
.set({
monitorWakeRequestedAt: now,
updatedAt: now,
})
.where(
and(
eq(issues.id, due.id),
sql`${issues.monitorNextCheckAt} is not null`,
lte(issues.monitorNextCheckAt, now),
isNull(issues.assigneeUserId),
sql`${issues.assigneeAgentId} is not null`,
inArray(issues.status, ["in_progress", "in_review"]),
or(
isNull(issues.monitorWakeRequestedAt),
lt(issues.monitorWakeRequestedAt, staleClaimThreshold),
),
),
)
.returning();
return (updated ?? null) as IssueMonitorDispatchRow | null;
});
if (!claimed) continue;
try {
const result = await dispatchClaimedIssueMonitor(claimed, {
now,
source: "automation",
triggerDetail: "system",
wakeReason: "issue_monitor_due",
actorType: "system",
actorId: "heartbeat_scheduler",
agentId: null,
runId: null,
clearOnClientError: true,
activitySource: "scheduled",
});
if (result.outcome === "triggered") triggered += 1;
if (result.outcome === "skipped") skipped += 1;
} catch (err) {
logger.error({ err, issueId: claimed.id }, "issue monitor tick failed");
}
}
return {
checked: dueMonitors.length,
triggered,
skipped,
};
}
async function getOldestRunForSession(agentId: string, sessionId: string) {
return db
.select({
@ -7735,6 +8426,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
}),
wakeup: enqueueWakeup,
triggerIssueMonitor,
reportRunActivity: clearDetachedRunWarning,
@ -7804,7 +8496,13 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
else skipped += 1;
}
return { checked, enqueued, skipped };
const issueMonitors = await tickDueIssueMonitors(now);
return {
checked: checked + issueMonitors.checked,
enqueued: enqueued + issueMonitors.triggered,
skipped: skipped + issueMonitors.skipped,
};
},
cancelRun: (runId: string) => cancelRunInternal(runId),