mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-16 19:00:38 +09:00
[codex] Add issue monitor liveness controls (#4988)
## Thinking Path > - Paperclip is a control plane for autonomous AI companies where work must stay observable, governable, and recoverable. > - The task/heartbeat subsystem owns agent execution continuity, issue state transitions, and visible recovery behavior. > - Waiting on an external service is not the same as being blocked when the assignee still owns a future check. > - The gap was that agents had no first-class one-shot monitor state for external-service waits, so recovery could look stalled or require ad hoc comments. > - This pull request adds bounded issue monitors that can wake the owner, clear exhausted waits, and produce explicit recovery behavior. > - It also surfaces monitor status in the board UI and documents when to use monitors versus `blocked`. > - The benefit is clearer liveness semantics for asynchronous waits without weakening single-assignee task ownership. ## What Changed - Added issue monitor fields, shared types, validators, constants, and an idempotent `0075` migration for scheduled monitor state. - Added server-side monitor scheduling, dispatch, recovery bounds, activity logging, and external-ref redaction. - Added board/agent route coverage for monitor permissions and child monitor scheduling. - Added issue detail/property UI for monitor state, a monitor activity card, and Storybook stories for review surfaces. - Documented monitor semantics and recovery policy behavior in `doc/execution-semantics.md`. - Addressed Greptile review feedback by preserving monitor state in skipped-stage builders and making board monitor saves send `scheduledBy: "board"`. ## Verification - `pnpm install --frozen-lockfile` - `pnpm run preflight:workspace-links && pnpm exec vitest run server/src/__tests__/issue-execution-policy-routes.test.ts server/src/__tests__/issue-execution-policy.test.ts server/src/__tests__/issue-monitor-scheduler.test.ts server/src/__tests__/recovery-classifiers.test.ts ui/src/components/IssueMonitorActivityCard.test.tsx ui/src/components/IssueProperties.test.tsx ui/src/lib/activity-format.test.ts` - First run passed 5 files and failed to collect 2 server suites because the worktree was missing the optional `acpx/runtime` dependency. - After `pnpm install --frozen-lockfile`, reran the 2 failed suites successfully. - `pnpm exec vitest run server/src/__tests__/issue-monitor-scheduler.test.ts server/src/__tests__/recovery-classifiers.test.ts` - `pnpm --filter @paperclipai/shared typecheck && pnpm --filter @paperclipai/db typecheck && pnpm --filter @paperclipai/server typecheck && pnpm --filter @paperclipai/ui typecheck` - `pnpm exec vitest run server/src/__tests__/issue-execution-policy.test.ts ui/src/components/IssueProperties.test.tsx` - `pnpm --filter @paperclipai/server typecheck && pnpm --filter @paperclipai/ui typecheck` - `pnpm exec vitest run ui/src/components/IssueMonitorActivityCard.test.tsx ui/src/components/IssueProperties.test.tsx` - `pnpm --filter @paperclipai/ui typecheck` - Storybook screenshot captured from `http://127.0.0.1:6006/iframe.html?viewMode=story&id=product-issue-monitor-surfaces--monitor-surfaces` with Playwright. ## Screenshots  ## Risks - Medium: this changes heartbeat recovery behavior for scheduled external-service waits, so regressions could affect wake timing or recovery issue creation. - Migration risk is reduced by using `IF NOT EXISTS` for the new issue monitor columns and index. - External monitor references are treated as secret-adjacent and are intentionally omitted from visible activity/wake payloads. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent with repository tool use and terminal execution. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots or Storybook review surfaces - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
76f09c8eb6
commit
57229d0f24
32 changed files with 19324 additions and 20 deletions
|
|
@ -81,6 +81,8 @@ import {
|
|||
applyIssueExecutionPolicyTransition,
|
||||
normalizeIssueExecutionPolicy,
|
||||
parseIssueExecutionState,
|
||||
redactIssueMonitorExternalRef,
|
||||
setIssueExecutionPolicyMonitorScheduledBy,
|
||||
} from "../services/issue-execution-policy.js";
|
||||
import type { PluginWorkerManager } from "../services/plugin-worker-manager.js";
|
||||
|
||||
|
|
@ -165,6 +167,53 @@ function summarizeIssueReferenceActivityDetails(input:
|
|||
};
|
||||
}
|
||||
|
||||
function monitorPoliciesEqual(left: NormalizedExecutionPolicy | null, right: NormalizedExecutionPolicy | null) {
|
||||
return JSON.stringify(left?.monitor ?? null) === JSON.stringify(right?.monitor ?? null);
|
||||
}
|
||||
|
||||
function applyActorMonitorScheduledBy(
|
||||
policy: NormalizedExecutionPolicy | null,
|
||||
actorType: "agent" | "user",
|
||||
) {
|
||||
return setIssueExecutionPolicyMonitorScheduledBy(policy, actorType === "user" ? "board" : "assignee");
|
||||
}
|
||||
|
||||
function assertCanManageIssueMonitor(req: Request, assigneeAgentId: string | null, monitorChanged: boolean) {
|
||||
if (!monitorChanged) return;
|
||||
if (req.actor.type === "board") return;
|
||||
if (req.actor.type === "agent" && req.actor.agentId && req.actor.agentId === assigneeAgentId) return;
|
||||
throw forbidden("Only the assignee agent or a board user can manage issue monitors");
|
||||
}
|
||||
|
||||
function summarizeIssueMonitor(
|
||||
issue: {
|
||||
monitorNextCheckAt?: Date | null;
|
||||
monitorLastTriggeredAt?: Date | null;
|
||||
monitorAttemptCount?: number | null;
|
||||
monitorNotes?: string | null;
|
||||
monitorScheduledBy?: string | null;
|
||||
executionState?: unknown;
|
||||
},
|
||||
policy: NormalizedExecutionPolicy | null,
|
||||
) {
|
||||
const state = parseIssueExecutionState(issue.executionState);
|
||||
return {
|
||||
nextCheckAt: issue.monitorNextCheckAt?.toISOString() ?? policy?.monitor?.nextCheckAt ?? null,
|
||||
lastTriggeredAt: issue.monitorLastTriggeredAt?.toISOString() ?? state?.monitor?.lastTriggeredAt ?? null,
|
||||
attemptCount: issue.monitorAttemptCount ?? state?.monitor?.attemptCount ?? 0,
|
||||
notes: policy?.monitor?.notes ?? issue.monitorNotes ?? state?.monitor?.notes ?? null,
|
||||
scheduledBy: issue.monitorScheduledBy ?? policy?.monitor?.scheduledBy ?? state?.monitor?.scheduledBy ?? null,
|
||||
kind: policy?.monitor?.kind ?? state?.monitor?.kind ?? null,
|
||||
serviceName: policy?.monitor?.serviceName ?? state?.monitor?.serviceName ?? null,
|
||||
externalRef: redactIssueMonitorExternalRef(policy?.monitor?.externalRef ?? state?.monitor?.externalRef ?? null),
|
||||
timeoutAt: policy?.monitor?.timeoutAt ?? state?.monitor?.timeoutAt ?? null,
|
||||
maxAttempts: policy?.monitor?.maxAttempts ?? state?.monitor?.maxAttempts ?? null,
|
||||
recoveryPolicy: policy?.monitor?.recoveryPolicy ?? state?.monitor?.recoveryPolicy ?? null,
|
||||
status: state?.monitor?.status ?? (policy?.monitor ? "scheduled" : null),
|
||||
clearReason: state?.monitor?.clearReason ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
function activityExecutionParticipantKey(participant: ActivityExecutionParticipant): string {
|
||||
return participant.type === "agent" ? `agent:${participant.agentId}` : `user:${participant.userId}`;
|
||||
}
|
||||
|
|
@ -1812,7 +1861,11 @@ export function issueRoutes(
|
|||
await assertIssueEnvironmentSelection(companyId, req.body.executionWorkspaceSettings?.environmentId);
|
||||
|
||||
const actor = getActorInfo(req);
|
||||
const executionPolicy = normalizeIssueExecutionPolicy(req.body.executionPolicy);
|
||||
const executionPolicy = applyActorMonitorScheduledBy(
|
||||
normalizeIssueExecutionPolicy(req.body.executionPolicy),
|
||||
actor.actorType,
|
||||
);
|
||||
assertCanManageIssueMonitor(req, req.body.assigneeAgentId ?? null, Boolean(executionPolicy?.monitor));
|
||||
const issue = await svc.create(companyId, {
|
||||
...req.body,
|
||||
executionPolicy,
|
||||
|
|
@ -1847,6 +1900,29 @@ export function issueRoutes(
|
|||
},
|
||||
});
|
||||
|
||||
if (executionPolicy?.monitor) {
|
||||
await logActivity(db, {
|
||||
companyId,
|
||||
actorType: actor.actorType,
|
||||
actorId: actor.actorId,
|
||||
agentId: actor.agentId,
|
||||
runId: actor.runId,
|
||||
action: "issue.monitor_scheduled",
|
||||
entityType: "issue",
|
||||
entityId: issue.id,
|
||||
details: {
|
||||
identifier: issue.identifier,
|
||||
nextCheckAt: executionPolicy.monitor.nextCheckAt,
|
||||
notes: executionPolicy.monitor.notes,
|
||||
scheduledBy: executionPolicy.monitor.scheduledBy,
|
||||
serviceName: executionPolicy.monitor.serviceName ?? null,
|
||||
timeoutAt: executionPolicy.monitor.timeoutAt ?? null,
|
||||
maxAttempts: executionPolicy.monitor.maxAttempts ?? null,
|
||||
recoveryPolicy: executionPolicy.monitor.recoveryPolicy ?? null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
void queueIssueAssignmentWakeup({
|
||||
heartbeat,
|
||||
issue,
|
||||
|
|
@ -1879,7 +1955,11 @@ export function issueRoutes(
|
|||
await assertIssueEnvironmentSelection(parent.companyId, req.body.executionWorkspaceSettings?.environmentId);
|
||||
|
||||
const actor = getActorInfo(req);
|
||||
const executionPolicy = normalizeIssueExecutionPolicy(req.body.executionPolicy);
|
||||
const executionPolicy = applyActorMonitorScheduledBy(
|
||||
normalizeIssueExecutionPolicy(req.body.executionPolicy),
|
||||
actor.actorType,
|
||||
);
|
||||
assertCanManageIssueMonitor(req, req.body.assigneeAgentId ?? null, Boolean(executionPolicy?.monitor));
|
||||
const { issue, parentBlockerAdded } = await svc.createChild(parent.id, {
|
||||
...req.body,
|
||||
executionPolicy,
|
||||
|
|
@ -1908,6 +1988,30 @@ export function issueRoutes(
|
|||
},
|
||||
});
|
||||
|
||||
if (executionPolicy?.monitor) {
|
||||
await logActivity(db, {
|
||||
companyId: parent.companyId,
|
||||
actorType: actor.actorType,
|
||||
actorId: actor.actorId,
|
||||
agentId: actor.agentId,
|
||||
runId: actor.runId,
|
||||
action: "issue.monitor_scheduled",
|
||||
entityType: "issue",
|
||||
entityId: issue.id,
|
||||
details: {
|
||||
identifier: issue.identifier,
|
||||
parentId: parent.id,
|
||||
nextCheckAt: executionPolicy.monitor.nextCheckAt,
|
||||
notes: executionPolicy.monitor.notes,
|
||||
scheduledBy: executionPolicy.monitor.scheduledBy,
|
||||
serviceName: executionPolicy.monitor.serviceName ?? null,
|
||||
timeoutAt: executionPolicy.monitor.timeoutAt ?? null,
|
||||
maxAttempts: executionPolicy.monitor.maxAttempts ?? null,
|
||||
recoveryPolicy: executionPolicy.monitor.recoveryPolicy ?? null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
void queueIssueAssignmentWakeup({
|
||||
heartbeat,
|
||||
issue,
|
||||
|
|
@ -1921,6 +2025,27 @@ export function issueRoutes(
|
|||
res.status(201).json(issue);
|
||||
});
|
||||
|
||||
router.post("/issues/:id/monitor/check-now", async (req, res) => {
|
||||
const id = req.params.id as string;
|
||||
const issue = await svc.getById(id);
|
||||
if (!issue) {
|
||||
res.status(404).json({ error: "Issue not found" });
|
||||
return;
|
||||
}
|
||||
assertCompanyAccess(req, issue.companyId);
|
||||
assertCanManageIssueMonitor(req, issue.assigneeAgentId, true);
|
||||
|
||||
const actor = getActorInfo(req);
|
||||
await heartbeat.triggerIssueMonitor(issue.id, {
|
||||
actorType: actor.actorType,
|
||||
actorId: actor.actorId,
|
||||
agentId: actor.agentId ?? null,
|
||||
runId: actor.runId ?? null,
|
||||
});
|
||||
|
||||
res.json({ ok: true });
|
||||
});
|
||||
|
||||
router.patch("/issues/:id", validate(updateIssueRouteSchema), async (req, res) => {
|
||||
const id = req.params.id as string;
|
||||
const existing = await svc.getById(id);
|
||||
|
|
@ -2043,7 +2168,10 @@ export function issueRoutes(
|
|||
updateFields.status = "todo";
|
||||
}
|
||||
if (req.body.executionPolicy !== undefined) {
|
||||
updateFields.executionPolicy = normalizeIssueExecutionPolicy(req.body.executionPolicy);
|
||||
updateFields.executionPolicy = applyActorMonitorScheduledBy(
|
||||
normalizeIssueExecutionPolicy(req.body.executionPolicy),
|
||||
actor.actorType,
|
||||
);
|
||||
}
|
||||
const previousExecutionPolicy = normalizeIssueExecutionPolicy(existing.executionPolicy ?? null);
|
||||
const nextExecutionPolicy =
|
||||
|
|
@ -2053,10 +2181,13 @@ export function issueRoutes(
|
|||
if (normalizedAssigneeAgentId !== undefined) {
|
||||
updateFields.assigneeAgentId = normalizedAssigneeAgentId;
|
||||
}
|
||||
const monitorChanged = monitorPoliciesEqual(previousExecutionPolicy, nextExecutionPolicy) === false;
|
||||
assertCanManageIssueMonitor(req, existing.assigneeAgentId, req.body.executionPolicy !== undefined && monitorChanged);
|
||||
|
||||
const transition = applyIssueExecutionPolicyTransition({
|
||||
issue: existing,
|
||||
policy: nextExecutionPolicy,
|
||||
previousPolicy: previousExecutionPolicy,
|
||||
requestedStatus: typeof updateFields.status === "string" ? updateFields.status : undefined,
|
||||
requestedAssigneePatch: {
|
||||
assigneeAgentId: normalizedAssigneeAgentId,
|
||||
|
|
@ -2069,6 +2200,7 @@ export function issueRoutes(
|
|||
},
|
||||
commentBody,
|
||||
reviewRequest: reviewRequest === undefined ? undefined : reviewRequest,
|
||||
monitorExplicitlyUpdated: req.body.executionPolicy !== undefined && monitorChanged,
|
||||
});
|
||||
const decisionId = transition.decision ? randomUUID() : null;
|
||||
if (decisionId) {
|
||||
|
|
@ -2372,6 +2504,51 @@ export function issueRoutes(
|
|||
});
|
||||
}
|
||||
|
||||
const nextStoredExecutionPolicy = normalizeIssueExecutionPolicy(issue.executionPolicy ?? null);
|
||||
const previousMonitor = summarizeIssueMonitor(existing, previousExecutionPolicy);
|
||||
const nextMonitor = summarizeIssueMonitor(issue, nextStoredExecutionPolicy);
|
||||
const monitorScheduledChanged = previousMonitor.nextCheckAt !== nextMonitor.nextCheckAt;
|
||||
if (nextMonitor.nextCheckAt && (monitorScheduledChanged || previousMonitor.notes !== nextMonitor.notes)) {
|
||||
await logActivity(db, {
|
||||
companyId: issue.companyId,
|
||||
actorType: actor.actorType,
|
||||
actorId: actor.actorId,
|
||||
agentId: actor.agentId,
|
||||
runId: actor.runId,
|
||||
action: "issue.monitor_scheduled",
|
||||
entityType: "issue",
|
||||
entityId: issue.id,
|
||||
details: {
|
||||
identifier: issue.identifier,
|
||||
nextCheckAt: nextMonitor.nextCheckAt,
|
||||
previousNextCheckAt: previousMonitor.nextCheckAt,
|
||||
notes: nextMonitor.notes,
|
||||
scheduledBy: nextMonitor.scheduledBy,
|
||||
serviceName: nextMonitor.serviceName,
|
||||
timeoutAt: nextMonitor.timeoutAt,
|
||||
maxAttempts: nextMonitor.maxAttempts,
|
||||
recoveryPolicy: nextMonitor.recoveryPolicy,
|
||||
},
|
||||
});
|
||||
} else if (!nextMonitor.nextCheckAt && previousMonitor.nextCheckAt) {
|
||||
await logActivity(db, {
|
||||
companyId: issue.companyId,
|
||||
actorType: actor.actorType,
|
||||
actorId: actor.actorId,
|
||||
agentId: actor.agentId,
|
||||
runId: actor.runId,
|
||||
action: "issue.monitor_cleared",
|
||||
entityType: "issue",
|
||||
entityId: issue.id,
|
||||
details: {
|
||||
identifier: issue.identifier,
|
||||
previousNextCheckAt: previousMonitor.nextCheckAt,
|
||||
reason: nextMonitor.clearReason ?? "manual",
|
||||
notes: previousMonitor.notes,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if (issue.status === "done" && existing.status !== "done") {
|
||||
const tc = getTelemetryClient();
|
||||
if (tc && actor.agentId) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue