mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-19 20:10:39 +09:00
[codex] Add issue monitor liveness controls (#4988)
## Thinking Path > - Paperclip is a control plane for autonomous AI companies where work must stay observable, governable, and recoverable. > - The task/heartbeat subsystem owns agent execution continuity, issue state transitions, and visible recovery behavior. > - Waiting on an external service is not the same as being blocked when the assignee still owns a future check. > - The gap was that agents had no first-class one-shot monitor state for external-service waits, so recovery could look stalled or require ad hoc comments. > - This pull request adds bounded issue monitors that can wake the owner, clear exhausted waits, and produce explicit recovery behavior. > - It also surfaces monitor status in the board UI and documents when to use monitors versus `blocked`. > - The benefit is clearer liveness semantics for asynchronous waits without weakening single-assignee task ownership. ## What Changed - Added issue monitor fields, shared types, validators, constants, and an idempotent `0075` migration for scheduled monitor state. - Added server-side monitor scheduling, dispatch, recovery bounds, activity logging, and external-ref redaction. - Added board/agent route coverage for monitor permissions and child monitor scheduling. - Added issue detail/property UI for monitor state, a monitor activity card, and Storybook stories for review surfaces. - Documented monitor semantics and recovery policy behavior in `doc/execution-semantics.md`. - Addressed Greptile review feedback by preserving monitor state in skipped-stage builders and making board monitor saves send `scheduledBy: "board"`. ## Verification - `pnpm install --frozen-lockfile` - `pnpm run preflight:workspace-links && pnpm exec vitest run server/src/__tests__/issue-execution-policy-routes.test.ts server/src/__tests__/issue-execution-policy.test.ts server/src/__tests__/issue-monitor-scheduler.test.ts server/src/__tests__/recovery-classifiers.test.ts ui/src/components/IssueMonitorActivityCard.test.tsx ui/src/components/IssueProperties.test.tsx ui/src/lib/activity-format.test.ts` - First run passed 5 files and failed to collect 2 server suites because the worktree was missing the optional `acpx/runtime` dependency. - After `pnpm install --frozen-lockfile`, reran the 2 failed suites successfully. - `pnpm exec vitest run server/src/__tests__/issue-monitor-scheduler.test.ts server/src/__tests__/recovery-classifiers.test.ts` - `pnpm --filter @paperclipai/shared typecheck && pnpm --filter @paperclipai/db typecheck && pnpm --filter @paperclipai/server typecheck && pnpm --filter @paperclipai/ui typecheck` - `pnpm exec vitest run server/src/__tests__/issue-execution-policy.test.ts ui/src/components/IssueProperties.test.tsx` - `pnpm --filter @paperclipai/server typecheck && pnpm --filter @paperclipai/ui typecheck` - `pnpm exec vitest run ui/src/components/IssueMonitorActivityCard.test.tsx ui/src/components/IssueProperties.test.tsx` - `pnpm --filter @paperclipai/ui typecheck` - Storybook screenshot captured from `http://127.0.0.1:6006/iframe.html?viewMode=story&id=product-issue-monitor-surfaces--monitor-surfaces` with Playwright. ## Screenshots  ## Risks - Medium: this changes heartbeat recovery behavior for scheduled external-service waits, so regressions could affect wake timing or recovery issue creation. - Migration risk is reduced by using `IF NOT EXISTS` for the new issue monitor columns and index. - External monitor references are treated as secret-adjacent and are intentionally omitted from visible activity/wake payloads. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent with repository tool use and terminal execution. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots or Storybook review surfaces - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
76f09c8eb6
commit
57229d0f24
32 changed files with 19324 additions and 20 deletions
|
|
@ -22,7 +22,10 @@ export interface IssueLivenessIssueInput {
|
|||
assigneeUserId?: string | null;
|
||||
createdByAgentId?: string | null;
|
||||
createdByUserId?: string | null;
|
||||
executionPolicy?: Record<string, unknown> | null;
|
||||
executionState?: Record<string, unknown> | null;
|
||||
monitorNextCheckAt?: Date | string | null;
|
||||
monitorAttemptCount?: number | null;
|
||||
}
|
||||
|
||||
export interface IssueLivenessRelationInput {
|
||||
|
|
@ -99,6 +102,7 @@ export interface IssueGraphLivenessInput {
|
|||
pendingInteractions?: IssueLivenessWaitingPathInput[];
|
||||
pendingApprovals?: IssueLivenessWaitingPathInput[];
|
||||
openRecoveryIssues?: IssueLivenessWaitingPathInput[];
|
||||
now?: Date | string;
|
||||
}
|
||||
|
||||
const INVOKABLE_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]);
|
||||
|
|
@ -140,6 +144,45 @@ function hasWaitingPath(
|
|||
return waitingPaths.some((entry) => entry.companyId === companyId && entry.issueId === issueId);
|
||||
}
|
||||
|
||||
function readRecord(value: unknown): Record<string, unknown> | null {
|
||||
return value && typeof value === "object" && !Array.isArray(value)
|
||||
? value as Record<string, unknown>
|
||||
: null;
|
||||
}
|
||||
|
||||
function readPositiveInteger(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : null;
|
||||
}
|
||||
|
||||
function readDateMs(value: unknown): number | null {
|
||||
if (!(typeof value === "string" || value instanceof Date)) return null;
|
||||
const date = value instanceof Date ? value : new Date(value);
|
||||
const time = date.getTime();
|
||||
return Number.isNaN(time) ? null : time;
|
||||
}
|
||||
|
||||
function monitorFromIssue(issue: IssueLivenessIssueInput) {
|
||||
const policyMonitor = readRecord(readRecord(issue.executionPolicy)?.monitor);
|
||||
const stateMonitor = readRecord(readRecord(issue.executionState)?.monitor);
|
||||
return { policyMonitor, stateMonitor };
|
||||
}
|
||||
|
||||
function hasScheduledMonitor(issue: IssueLivenessIssueInput, nowMs: number) {
|
||||
const nextCheckAtMs = readDateMs(issue.monitorNextCheckAt);
|
||||
if (nextCheckAtMs === null || nextCheckAtMs <= nowMs) return false;
|
||||
|
||||
const { policyMonitor, stateMonitor } = monitorFromIssue(issue);
|
||||
const timeoutAtMs = readDateMs(policyMonitor?.timeoutAt ?? stateMonitor?.timeoutAt);
|
||||
if (timeoutAtMs !== null && timeoutAtMs <= nowMs) return false;
|
||||
|
||||
const maxAttempts = readPositiveInteger(policyMonitor?.maxAttempts ?? stateMonitor?.maxAttempts);
|
||||
const stateAttemptCount = readPositiveInteger(stateMonitor?.attemptCount) ?? 0;
|
||||
const attemptCount = issue.monitorAttemptCount ?? stateAttemptCount;
|
||||
if (maxAttempts !== null && attemptCount >= maxAttempts) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function readPrincipalAgentId(principal: unknown): string | null {
|
||||
if (!principal || typeof principal !== "object") return null;
|
||||
const value = principal as Record<string, unknown>;
|
||||
|
|
@ -308,6 +351,7 @@ function finding(input: {
|
|||
}
|
||||
|
||||
export function classifyIssueGraphLiveness(input: IssueGraphLivenessInput): IssueLivenessFinding[] {
|
||||
const nowMs = readDateMs(input.now ?? new Date()) ?? Date.now();
|
||||
const issuesById = new Map(input.issues.map((issue) => [issue.id, issue]));
|
||||
const agentsById = new Map(input.agents.map((agent) => [agent.id, agent]));
|
||||
const blockersByBlockedIssueId = new Map<string, IssueLivenessRelationInput[]>();
|
||||
|
|
@ -351,6 +395,7 @@ export function classifyIssueGraphLiveness(input: IssueGraphLivenessInput): Issu
|
|||
|
||||
function hasExplicitWaitingPath(issue: IssueLivenessIssueInput) {
|
||||
return Boolean(issue.assigneeUserId) ||
|
||||
hasScheduledMonitor(issue, nowMs) ||
|
||||
hasActiveExecutionPath(issue.companyId, issue.id, activeRuns, queuedWakeRequests) ||
|
||||
hasWaitingPath(issue.companyId, issue.id, pendingInteractions) ||
|
||||
hasWaitingPath(issue.companyId, issue.id, pendingApprovals) ||
|
||||
|
|
|
|||
|
|
@ -1836,7 +1836,10 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
assigneeUserId: issues.assigneeUserId,
|
||||
createdByAgentId: issues.createdByAgentId,
|
||||
createdByUserId: issues.createdByUserId,
|
||||
executionPolicy: issues.executionPolicy,
|
||||
executionState: issues.executionState,
|
||||
monitorNextCheckAt: issues.monitorNextCheckAt,
|
||||
monitorAttemptCount: issues.monitorAttemptCount,
|
||||
})
|
||||
.from(issues)
|
||||
.where(
|
||||
|
|
@ -1966,6 +1969,7 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
pendingInteractions: interactionRows,
|
||||
pendingApprovals: approvalRows,
|
||||
openRecoveryIssues,
|
||||
now: new Date(),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue