mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-16 02:40:39 +09:00
Guard cheap recovery model usage (#6371)
## Thinking Path > - Paperclip is the control plane that coordinates AI-agent work through issues, heartbeats, comments, approvals, and auditable recovery paths. > - The affected subsystem is heartbeat/recovery orchestration, especially the optional cheap model profile used for operational recovery overhead. > - Cheap recovery should repair status and liveness, but it must not become the worker lane that writes deliverables, continues source work, or propagates cheap execution hints into downstream retries. > - The gap was that cheap-profile hints could follow recovery wake contexts and assignment overrides farther than intended, making real work eligible to run on the cheap model. > - This pull request separates status-only cheap recovery from normal source-work continuations, adds route guards for deliverable mutations during cheap status-only runs, and documents the invariant. > - The benefit is safer retry/recovery behavior: cheap runs can clean up control-plane state, while any remaining source work resumes through a normal/original model path. ## What Changed - Added recovery model-profile work classes so status-only recovery carries explicit guard context and normal-model continuations scrub cheap hints. - Updated heartbeat, productivity review, liveness continuation, and recovery service wakeups to request cheap only for bounded status-only recovery work. - Blocked cheap status-only recovery runs from writing issue documents, plans, attachments, work products, or assigning downstream work back to `modelProfile: "cheap"`. - Added/updated server tests for cheap profile propagation, artifact/document guards, route authorization, retry scheduling, and successful-run handoff behavior. - Documented the recovery model-profile lane in `doc/SPEC-implementation.md` and `doc/execution-semantics.md`. - After rebasing onto current `public-gh/master`, stabilized the new `InstanceSidebar` plugin-filter tests so the PR check lane stays green. ## Verification - Local: `pnpm exec vitest run --config vitest.config.ts src/services/recovery/model-profile-hint.test.ts src/__tests__/issue-agent-mutation-ownership-routes.test.ts src/__tests__/issue-document-restore-routes.test.ts` from `server/` - 3 files, 37 tests passed after final edits. - Local: `pnpm exec vitest run --config vitest.config.ts src/__tests__/heartbeat-process-recovery.test.ts` from `server/` - 44 tests passed after rerunning the cleanup-sensitive file alone. - Local: `pnpm --filter @paperclipai/ui exec vitest run src/components/InstanceSidebar.test.tsx` - 4 tests passed. - Local: `pnpm --filter @paperclipai/server typecheck` - passed. - Local: `pnpm --filter @paperclipai/ui typecheck` - passed. - PR checks on latest head `6f8c3b1380f5bd872c6f49f6f7188ecf3bb6d263` - all green, including `verify`, build, typecheck, server/general/serialized tests, e2e, Snyk, and policy. - Greptile: pass 3 returned Confidence Score 5/5 with zero unresolved Greptile review threads. ## Risks - Medium risk: recovery behavior is intentionally stricter, so any path that incorrectly relies on cheap recovery to keep doing source work will now need to hand back to a normal-model run. - Low migration risk: no schema changes. - No product UI changes; the UI file touched is a test-only stabilization after rebasing onto current `master`. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex coding agent, GPT-5 model family (`gpt-5`), tool use and local code execution enabled; context window not exposed in this environment. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots (N/A: no product UI changes) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
This commit is contained in:
parent
24748de421
commit
bfe6369ef5
17 changed files with 529 additions and 78 deletions
|
|
@ -2785,7 +2785,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
projectId: input.claimed.projectId,
|
||||
goalId: input.claimed.goalId,
|
||||
assigneeAgentId: input.claimed.assigneeAgentId,
|
||||
assigneeAdapterOverrides: recoveryAssigneeAdapterOverrides(),
|
||||
assigneeAdapterOverrides: recoveryAssigneeAdapterOverrides("status_only"),
|
||||
originKind: RECOVERY_ORIGIN_KINDS.strandedIssueRecovery,
|
||||
originId: input.claimed.id,
|
||||
originFingerprint: `issue_monitor:${input.clearReason}`,
|
||||
|
|
@ -2799,7 +2799,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
triggerDetail: "system",
|
||||
reason: "issue_monitor_recovery_issue",
|
||||
idempotencyKey: `issue-monitor-recovery-issue:${input.claimed.id}:${input.clearReason}:${input.scheduledAtIso}`,
|
||||
payload: withRecoveryModelProfileHint({ issueId: recoveryIssue.id, sourceIssueId: input.claimed.id }),
|
||||
payload: withRecoveryModelProfileHint({ issueId: recoveryIssue.id, sourceIssueId: input.claimed.id }, "status_only"),
|
||||
requestedByActorType: input.actorType,
|
||||
requestedByActorId: input.actorId,
|
||||
contextSnapshot: withRecoveryModelProfileHint({
|
||||
|
|
@ -2807,7 +2807,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
sourceIssueId: input.claimed.id,
|
||||
source: "issue.monitor.recovery_issue",
|
||||
wakeReason: "issue_monitor_recovery_issue",
|
||||
}),
|
||||
}, "status_only"),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -2868,7 +2868,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
serviceName: input.monitor?.serviceName ?? null,
|
||||
timeoutAt: input.monitor?.timeoutAt ?? null,
|
||||
maxAttempts: input.monitor?.maxAttempts ?? null,
|
||||
}),
|
||||
}, "status_only"),
|
||||
requestedByActorType: input.actorType,
|
||||
requestedByActorId: input.actorId,
|
||||
contextSnapshot: withRecoveryModelProfileHint({
|
||||
|
|
@ -2881,7 +2881,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
serviceName: input.monitor?.serviceName ?? null,
|
||||
timeoutAt: input.monitor?.timeoutAt ?? null,
|
||||
maxAttempts: input.monitor?.maxAttempts ?? null,
|
||||
}),
|
||||
}, "status_only"),
|
||||
});
|
||||
|
||||
await logActivity(db, {
|
||||
|
|
@ -4535,7 +4535,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
wakeReason: "missing_issue_comment",
|
||||
retryReason: "missing_issue_comment",
|
||||
missingIssueCommentForRunId: run.id,
|
||||
});
|
||||
}, "status_only");
|
||||
const now = new Date();
|
||||
|
||||
const retryRun = await db.transaction(async (tx) => {
|
||||
|
|
@ -4562,7 +4562,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
issueId,
|
||||
retryOfRunId: run.id,
|
||||
retryReason: "missing_issue_comment",
|
||||
}),
|
||||
}, "status_only"),
|
||||
status: "queued",
|
||||
requestedByActorType: "system",
|
||||
requestedByActorId: null,
|
||||
|
|
@ -4755,7 +4755,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
retryOfRunId: run.id,
|
||||
wakeReason: "process_lost_retry",
|
||||
retryReason: "process_lost",
|
||||
});
|
||||
}, "normal_model");
|
||||
|
||||
const queued = await db.transaction(async (tx) => {
|
||||
const wakeupRequest = await tx
|
||||
|
|
@ -4769,7 +4769,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
payload: withRecoveryModelProfileHint({
|
||||
...(issueId ? { issueId } : {}),
|
||||
retryOfRunId: run.id,
|
||||
}),
|
||||
}, "normal_model"),
|
||||
status: "queued",
|
||||
requestedByActorType: "system",
|
||||
requestedByActorId: null,
|
||||
|
|
@ -5322,7 +5322,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
scheduledRetryAt: schedule.dueAt.toISOString(),
|
||||
...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}),
|
||||
...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}),
|
||||
});
|
||||
}, "normal_model");
|
||||
const maxTurnContinuationIdempotencyKey = retryReason === MAX_TURN_CONTINUATION_RETRY_REASON
|
||||
? `max-turn-continuation:${run.companyId}:${issueId ?? "no-issue"}:${run.id}:${schedule.attempt}`
|
||||
: null;
|
||||
|
|
@ -5492,7 +5492,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
scheduledRetryAt: schedule.dueAt.toISOString(),
|
||||
...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}),
|
||||
...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}),
|
||||
}),
|
||||
}, "normal_model"),
|
||||
status: "queued",
|
||||
requestedByActorType: "system",
|
||||
requestedByActorId: null,
|
||||
|
|
@ -8562,7 +8562,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
payload: withRecoveryModelProfileHint({
|
||||
issueId: issue.id,
|
||||
retryOfRunId: run.id,
|
||||
}),
|
||||
}, "normal_model"),
|
||||
status: "queued",
|
||||
requestedByActorType: "system",
|
||||
requestedByActorId: null,
|
||||
|
|
@ -8587,7 +8587,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
|
|||
retryReason,
|
||||
source: recoverySource,
|
||||
retryOfRunId: run.id,
|
||||
}),
|
||||
}, "normal_model"),
|
||||
sessionIdBefore: recoverySessionBefore,
|
||||
retryOfRunId: run.id,
|
||||
updatedAt: now,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue